mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
LoopVectorizer: Recognize min/max reductions
A min/max operation is represented by a select(cmp(lt/le/gt/ge, X, Y), X, Y) sequence in LLVM. If we see such a sequence we can treat it just as any other commutative binary instruction and reduce it. This appears to help bzip2 by about 1.5% on an imac12,2. radar://12960601 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179773 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bff177676c
commit
a3fb330d05
@ -343,6 +343,7 @@ public:
|
||||
RK_IntegerOr, ///< Bitwise or logical OR of numbers.
|
||||
RK_IntegerAnd, ///< Bitwise or logical AND of numbers.
|
||||
RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
|
||||
RK_IntegerMinMax, //< Min/max implemented in terms of select(cmp()).
|
||||
RK_FloatAdd, ///< Sum of floats.
|
||||
RK_FloatMult ///< Product of floats.
|
||||
};
|
||||
@ -361,8 +362,9 @@ public:
|
||||
ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
|
||||
Kind(RK_NoReduction) {}
|
||||
|
||||
ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K)
|
||||
: StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
|
||||
ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
|
||||
CmpInst::Predicate P)
|
||||
: StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxPred(P) {}
|
||||
|
||||
// The starting value of the reduction.
|
||||
// It does not have to be zero!
|
||||
@ -371,6 +373,25 @@ public:
|
||||
Instruction *LoopExitInstr;
|
||||
// The kind of the reduction.
|
||||
ReductionKind Kind;
|
||||
// If this a min/max reduction the kind of reduction.
|
||||
CmpInst::Predicate MinMaxPred;
|
||||
};
|
||||
|
||||
/// This POD struct holds information about a potential reduction operation.
|
||||
struct ReductionInstDesc {
|
||||
ReductionInstDesc(bool IsRedux, Instruction *I) :
|
||||
IsReduction(IsRedux), PatternLastInst(I), Predicate(ICmpInst::ICMP_EQ) {}
|
||||
|
||||
ReductionInstDesc(Instruction *I, CmpInst::Predicate P) :
|
||||
IsReduction(true), PatternLastInst(I), Predicate(P) {}
|
||||
|
||||
// Is this instruction a reduction candidate.
|
||||
bool IsReduction;
|
||||
// The last instruction in a min/max pattern (select of the select(icmp())
|
||||
// pattern), or the current reduction instruction otherwise.
|
||||
Instruction *PatternLastInst;
|
||||
// If this is a min/max pattern the comparison predicate.
|
||||
CmpInst::Predicate Predicate;
|
||||
};
|
||||
|
||||
// This POD struct holds information about the memory runtime legality
|
||||
@ -487,9 +508,13 @@ private:
|
||||
/// Returns True, if 'Phi' is the kind of reduction variable for type
|
||||
/// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
|
||||
bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
|
||||
/// Returns true if the instruction I can be a reduction variable of type
|
||||
/// 'Kind'.
|
||||
bool isReductionInstr(Instruction *I, ReductionKind Kind);
|
||||
/// Returns a struct describing if the instruction 'I' can be a reduction
|
||||
/// variable of type 'Kind'. If the reduction is a min/max pattern of
|
||||
/// select(icmp()) this function advances the instruction pointer 'I' from the
|
||||
/// compare instruction to the select instruction and stores this pointer in
|
||||
/// 'PatternLastInst' member of the returned struct.
|
||||
ReductionInstDesc isReductionInstr(Instruction *I, ReductionKind Kind,
|
||||
ReductionInstDesc Desc);
|
||||
/// Returns the induction kind of Phi. This function may return NoInduction
|
||||
/// if the PHI is not an induction variable.
|
||||
InductionKind isInductionVariable(PHINode *Phi);
|
||||
@ -1437,7 +1462,8 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
|
||||
/// This function returns the identity element (or neutral element) for
|
||||
/// the operation K.
|
||||
static Constant*
|
||||
getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
|
||||
getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp,
|
||||
CmpInst::Predicate Pred) {
|
||||
switch (K) {
|
||||
case LoopVectorizationLegality:: RK_IntegerXor:
|
||||
case LoopVectorizationLegality:: RK_IntegerAdd:
|
||||
@ -1456,6 +1482,28 @@ getReductionIdentity(LoopVectorizationLegality::ReductionKind K, Type *Tp) {
|
||||
case LoopVectorizationLegality:: RK_FloatAdd:
|
||||
// Adding zero to a number does not change it.
|
||||
return ConstantFP::get(Tp, 0.0L);
|
||||
case LoopVectorizationLegality:: RK_IntegerMinMax:
|
||||
switch(Pred) {
|
||||
default: llvm_unreachable("Unknown min/max predicate");
|
||||
case CmpInst::ICMP_ULT:
|
||||
case CmpInst::ICMP_ULE:
|
||||
return ConstantInt::getAllOnesValue(Tp);
|
||||
case CmpInst::ICMP_UGT:
|
||||
case CmpInst::ICMP_UGE:
|
||||
return ConstantInt::get(Tp, 0);
|
||||
case CmpInst::ICMP_SLT:
|
||||
case CmpInst::ICMP_SLE: {
|
||||
unsigned BitWidth = Tp->getPrimitiveSizeInBits();
|
||||
return ConstantInt::get(Tp->getContext(),
|
||||
APInt::getSignedMaxValue(BitWidth));
|
||||
}
|
||||
case CmpInst::ICMP_SGT:
|
||||
case CmpInst::ICMP_SGE: {
|
||||
unsigned BitWidth = Tp->getPrimitiveSizeInBits();
|
||||
return ConstantInt::get(Tp->getContext(),
|
||||
APInt::getSignedMinValue(BitWidth));
|
||||
}
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("Unknown reduction kind");
|
||||
}
|
||||
@ -1566,7 +1614,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
|
||||
}
|
||||
|
||||
/// This function translates the reduction kind to an LLVM binary operator.
|
||||
static Instruction::BinaryOps
|
||||
static unsigned
|
||||
getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
|
||||
switch (Kind) {
|
||||
case LoopVectorizationLegality::RK_IntegerAdd:
|
||||
@ -1583,11 +1631,20 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
|
||||
return Instruction::FMul;
|
||||
case LoopVectorizationLegality::RK_FloatAdd:
|
||||
return Instruction::FAdd;
|
||||
case LoopVectorizationLegality::RK_IntegerMinMax:
|
||||
return Instruction::ICmp;
|
||||
default:
|
||||
llvm_unreachable("Unknown reduction operation");
|
||||
}
|
||||
}
|
||||
|
||||
Value *createMinMaxOp(IRBuilder<> &Builder, ICmpInst::Predicate P, Value *Left,
|
||||
Value *Right) {
|
||||
Value *Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
|
||||
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
|
||||
return Select;
|
||||
}
|
||||
|
||||
void
|
||||
InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
||||
//===------------------------------------------------===//
|
||||
@ -1651,7 +1708,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
||||
|
||||
// Find the reduction identity variable. Zero for addition, or, xor,
|
||||
// one for multiplication, -1 for And.
|
||||
Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType());
|
||||
Constant *Iden = getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType(),
|
||||
RdxDesc.MinMaxPred);
|
||||
Constant *Identity = ConstantVector::getSplat(VF, Iden);
|
||||
|
||||
// This vector is the Identity vector where the first element is the
|
||||
@ -1699,10 +1757,15 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
||||
|
||||
// Reduce all of the unrolled parts into a single vector.
|
||||
Value *ReducedPartRdx = RdxParts[0];
|
||||
unsigned Op = getReductionBinOp(RdxDesc.Kind);
|
||||
for (unsigned part = 1; part < UF; ++part) {
|
||||
Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
|
||||
ReducedPartRdx = Builder.CreateBinOp(Op, RdxParts[part], ReducedPartRdx,
|
||||
"bin.rdx");
|
||||
if (Op != Instruction::ICmp)
|
||||
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
|
||||
RdxParts[part], ReducedPartRdx,
|
||||
"bin.rdx");
|
||||
else
|
||||
ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxPred,
|
||||
ReducedPartRdx, RdxParts[part]);
|
||||
}
|
||||
|
||||
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
|
||||
@ -1727,8 +1790,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
||||
ConstantVector::get(ShuffleMask),
|
||||
"rdx.shuf");
|
||||
|
||||
Instruction::BinaryOps Op = getReductionBinOp(RdxDesc.Kind);
|
||||
TmpVec = Builder.CreateBinOp(Op, TmpVec, Shuf, "bin.rdx");
|
||||
if (Op != Instruction::ICmp)
|
||||
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
|
||||
"bin.rdx");
|
||||
else
|
||||
TmpVec = createMinMaxOp(Builder, RdxDesc.MinMaxPred, TmpVec, Shuf);
|
||||
}
|
||||
|
||||
// The result is in the first element of the vector.
|
||||
@ -2315,6 +2381,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
||||
DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
|
||||
continue;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_IntegerMinMax)) {
|
||||
DEBUG(dbgs() << "LV: Found a MINMAX reduction PHI."<< *Phi <<"\n");
|
||||
continue;
|
||||
}
|
||||
if (AddReductionVar(Phi, RK_FloatMult)) {
|
||||
DEBUG(dbgs() << "LV: Found an FMult reduction PHI."<< *Phi <<"\n");
|
||||
continue;
|
||||
@ -2734,6 +2804,14 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
||||
// out-of-block user. The cycle must end with the original PHI.
|
||||
Instruction *Iter = Phi;
|
||||
|
||||
// To recognize min/max patterns formed by a icmp select sequence, we store
|
||||
// the number of instruction we saw from the recognized min/max pattern,
|
||||
// such that we don't stop when we see the phi has two uses (one by the select
|
||||
// and one by the icmp) and to make sure we only see exactly the two
|
||||
// instructions.
|
||||
unsigned NumICmpSelectPatternInst = 0;
|
||||
ReductionInstDesc ReduxDesc(false, 0);
|
||||
|
||||
// Avoid cycles in the chain.
|
||||
SmallPtrSet<Instruction *, 8> VisitedInsts;
|
||||
while (VisitedInsts.insert(Iter)) {
|
||||
@ -2778,23 +2856,35 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
||||
Iter->hasNUsesOrMore(2))
|
||||
continue;
|
||||
|
||||
// We can't have multiple inside users.
|
||||
if (FoundInBlockUser)
|
||||
// We can't have multiple inside users except for a combination of
|
||||
// icmp/select both using the phi.
|
||||
if (FoundInBlockUser && !NumICmpSelectPatternInst)
|
||||
return false;
|
||||
FoundInBlockUser = true;
|
||||
|
||||
// Any reduction instr must be of one of the allowed kinds.
|
||||
if (!isReductionInstr(U, Kind))
|
||||
ReduxDesc = isReductionInstr(U, Kind, ReduxDesc);
|
||||
if (!ReduxDesc.IsReduction)
|
||||
return false;
|
||||
|
||||
if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) ||
|
||||
isa<SelectInst>(U)))
|
||||
++NumICmpSelectPatternInst;
|
||||
|
||||
// Reductions of instructions such as Div, and Sub is only
|
||||
// possible if the LHS is the reduction variable.
|
||||
if (!U->isCommutative() && !isa<PHINode>(U) && U->getOperand(0) != Iter)
|
||||
if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
|
||||
!isa<ICmpInst>(U) && U->getOperand(0) != Iter)
|
||||
return false;
|
||||
|
||||
Iter = U;
|
||||
Iter = ReduxDesc.PatternLastInst;
|
||||
}
|
||||
|
||||
// This means we have seen one but not the other instruction of the
|
||||
// pattern or more than just a select and cmp.
|
||||
if (Kind == RK_IntegerMinMax && NumICmpSelectPatternInst != 2)
|
||||
return false;
|
||||
|
||||
// We found a reduction var if we have reached the original
|
||||
// phi node and we only have a single instruction with out-of-loop
|
||||
// users.
|
||||
@ -2803,7 +2893,8 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
||||
AllowedExit.insert(ExitInstruction);
|
||||
|
||||
// Save the description of this reduction variable.
|
||||
ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
|
||||
ReductionDescriptor RD(RdxStart, ExitInstruction, Kind,
|
||||
ReduxDesc.Predicate);
|
||||
Reductions[Phi] = RD;
|
||||
// We've ended the cycle. This is a reduction variable if we have an
|
||||
// outside user and it has a binary op.
|
||||
@ -2814,36 +2905,120 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
static CmpInst::Predicate getPredicateSense(CmpInst::Predicate P,
|
||||
bool ShouldRevert) {
|
||||
if (!ShouldRevert) return P;
|
||||
|
||||
switch(P) {
|
||||
default:
|
||||
llvm_unreachable("Unknown predicate sense");
|
||||
case CmpInst::ICMP_UGT:
|
||||
case CmpInst::ICMP_UGE:
|
||||
return CmpInst::ICMP_ULT;
|
||||
case CmpInst::ICMP_SGT:
|
||||
case CmpInst::ICMP_SGE:
|
||||
return CmpInst::ICMP_SLT;
|
||||
case CmpInst::ICMP_ULT:
|
||||
case CmpInst::ICMP_ULE:
|
||||
return CmpInst::ICMP_UGT;
|
||||
case CmpInst::ICMP_SLT:
|
||||
case CmpInst::ICMP_SLE:
|
||||
return CmpInst::ICMP_SGT;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
|
||||
/// pattern corresponding to a min(X, Y) or max(X, Y).
|
||||
static LoopVectorizationLegality::ReductionInstDesc
|
||||
isMinMaxSelectCmpPattern(Instruction *I) {
|
||||
|
||||
assert((isa<ICmpInst>(I) || isa<SelectInst>(I)) &&
|
||||
"Expect a select instruction");
|
||||
ICmpInst *Cmp = 0;
|
||||
SelectInst *Select = 0;
|
||||
|
||||
// Look for a select(icmp(),...) pattern. Only handle integer reductions for
|
||||
// now.
|
||||
if ((Select = dyn_cast<SelectInst>(I))) {
|
||||
if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))))
|
||||
return LoopVectorizationLegality::ReductionInstDesc(false, I);
|
||||
// Only handle the single user case
|
||||
if (!Cmp->hasOneUse())
|
||||
return LoopVectorizationLegality::ReductionInstDesc(false, I);
|
||||
} else if ((Cmp = dyn_cast<ICmpInst>(I))) {
|
||||
// Only handle the single user case.
|
||||
if (!Cmp->hasOneUse())
|
||||
return LoopVectorizationLegality::ReductionInstDesc(false, I);
|
||||
// Look for the select.
|
||||
if (!(Select = dyn_cast<SelectInst>(*I->use_begin())))
|
||||
return LoopVectorizationLegality::ReductionInstDesc(false, I);
|
||||
// Compare must be the first operand of the select.
|
||||
if (Select->getOperand(0) != Cmp)
|
||||
return LoopVectorizationLegality::ReductionInstDesc(false, I);
|
||||
}
|
||||
|
||||
CmpInst::Predicate Pred = Cmp->getPredicate();
|
||||
|
||||
// Only (u/s)lt/gt/ge/le are min or max patterns.
|
||||
if (Pred == CmpInst::ICMP_EQ ||
|
||||
Pred == CmpInst::ICMP_NE)
|
||||
return LoopVectorizationLegality::ReductionInstDesc(false, I);
|
||||
|
||||
Value *SelectOp1 = Select->getOperand(1);
|
||||
Value *SelectOp2 = Select->getOperand(2);
|
||||
|
||||
Value *CmpLeft = Cmp->getOperand(0);
|
||||
Value *CmpRight = Cmp->getOperand(1);
|
||||
|
||||
// Can have reversed sense.
|
||||
// select(slt(X, Y), Y, X) == select(sge(X, Y), X, Y).
|
||||
bool IsInverted = (SelectOp2 == CmpLeft && SelectOp1 == CmpRight);
|
||||
bool IsMinMaxPattern = (SelectOp1 == CmpLeft && SelectOp2 == CmpRight) ||
|
||||
IsInverted;
|
||||
|
||||
// Advance the instruction pointer from the icmp to the select instruction.
|
||||
if (IsMinMaxPattern) {
|
||||
CmpInst::Predicate P = getPredicateSense(Pred, IsInverted);
|
||||
return LoopVectorizationLegality::ReductionInstDesc(Select, P);
|
||||
}
|
||||
|
||||
return LoopVectorizationLegality::ReductionInstDesc(false, I);
|
||||
}
|
||||
|
||||
LoopVectorizationLegality::ReductionInstDesc
|
||||
LoopVectorizationLegality::isReductionInstr(Instruction *I,
|
||||
ReductionKind Kind) {
|
||||
ReductionKind Kind,
|
||||
ReductionInstDesc Desc) {
|
||||
bool FP = I->getType()->isFloatingPointTy();
|
||||
bool FastMath = (FP && I->isCommutative() && I->isAssociative());
|
||||
|
||||
switch (I->getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
return ReductionInstDesc(false, I);
|
||||
case Instruction::PHI:
|
||||
if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
|
||||
return false;
|
||||
// possibly.
|
||||
return true;
|
||||
return ReductionInstDesc(false, I);
|
||||
return ReductionInstDesc(I, Desc.Predicate);
|
||||
case Instruction::Sub:
|
||||
case Instruction::Add:
|
||||
return Kind == RK_IntegerAdd;
|
||||
return ReductionInstDesc(Kind == RK_IntegerAdd, I);
|
||||
case Instruction::Mul:
|
||||
return Kind == RK_IntegerMult;
|
||||
return ReductionInstDesc(Kind == RK_IntegerMult, I);
|
||||
case Instruction::And:
|
||||
return Kind == RK_IntegerAnd;
|
||||
return ReductionInstDesc(Kind == RK_IntegerAnd, I);
|
||||
case Instruction::Or:
|
||||
return Kind == RK_IntegerOr;
|
||||
return ReductionInstDesc(Kind == RK_IntegerOr, I);
|
||||
case Instruction::Xor:
|
||||
return Kind == RK_IntegerXor;
|
||||
return ReductionInstDesc(Kind == RK_IntegerXor, I);
|
||||
case Instruction::FMul:
|
||||
return Kind == RK_FloatMult && FastMath;
|
||||
return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
|
||||
case Instruction::FAdd:
|
||||
return Kind == RK_FloatAdd && FastMath;
|
||||
}
|
||||
return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
|
||||
case Instruction::ICmp:
|
||||
case Instruction::Select:
|
||||
if (Kind != RK_IntegerMinMax)
|
||||
return ReductionInstDesc(false, I);
|
||||
return isMinMaxSelectCmpPattern(I);
|
||||
}
|
||||
}
|
||||
|
||||
LoopVectorizationLegality::InductionKind
|
||||
|
399
test/Transforms/LoopVectorize/minmax_reduction.ll
Normal file
399
test/Transforms/LoopVectorize/minmax_reduction.ll
Normal file
@ -0,0 +1,399 @@
|
||||
; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
|
||||
@A = common global [1024 x i32] zeroinitializer, align 16
|
||||
|
||||
; Signed tests.
|
||||
|
||||
; Turn this into a max reduction.
|
||||
; CHECK: @max_red
|
||||
; CHECK: icmp sgt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp sgt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @max_red(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp sgt i32 %0, %max.red.08
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; Turn this into a max reduction. The select has its inputs reversed therefore
|
||||
; this is a max reduction.
|
||||
; CHECK: @max_red_inverse_select
|
||||
; CHECK: icmp slt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp sgt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @max_red_inverse_select(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp slt i32 %max.red.08, %0
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; Turn this into a min reduction.
|
||||
; CHECK: @min_red
|
||||
; CHECK: icmp slt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp slt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @min_red(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp slt i32 %0, %max.red.08
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; Turn this into a min reduction. The select has its inputs reversed therefore
|
||||
; this is a min reduction.
|
||||
; CHECK: @min_red_inverse_select
|
||||
; CHECK: icmp sgt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp slt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @min_red_inverse_select(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp sgt i32 %max.red.08, %0
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; Unsigned tests.
|
||||
|
||||
; Turn this into a max reduction.
|
||||
; CHECK: @umax_red
|
||||
; CHECK: icmp ugt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp ugt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @umax_red(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp ugt i32 %0, %max.red.08
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; Turn this into a max reduction. The select has its inputs reversed therefore
|
||||
; this is a max reduction.
|
||||
; CHECK: @umax_red_inverse_select
|
||||
; CHECK: icmp ult <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp ugt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @umax_red_inverse_select(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp ult i32 %max.red.08, %0
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; Turn this into a min reduction.
|
||||
; CHECK: @umin_red
|
||||
; CHECK: icmp ult <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp ult <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @umin_red(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp ult i32 %0, %max.red.08
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; Turn this into a min reduction. The select has its inputs reversed therefore
|
||||
; this is a min reduction.
|
||||
; CHECK: @umin_red_inverse_select
|
||||
; CHECK: icmp ugt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp ult <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @umin_red_inverse_select(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp ugt i32 %max.red.08, %0
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; SGE -> SLT
|
||||
; Turn this into a min reduction (select inputs are reversed).
|
||||
; CHECK: @sge_min_red
|
||||
; CHECK: icmp sge <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp slt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @sge_min_red(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp sge i32 %0, %max.red.08
|
||||
%max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; SLE -> SGT
|
||||
; Turn this into a max reduction (select inputs are reversed).
|
||||
; CHECK: @sle_min_red
|
||||
; CHECK: icmp sle <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp sgt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @sle_min_red(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp sle i32 %0, %max.red.08
|
||||
%max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; UGE -> ULT
|
||||
; Turn this into a min reduction (select inputs are reversed).
|
||||
; CHECK: @uge_min_red
|
||||
; CHECK: icmp uge <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp ult <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @uge_min_red(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp uge i32 %0, %max.red.08
|
||||
%max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; ULE -> UGT
|
||||
; Turn this into a max reduction (select inputs are reversed).
|
||||
; CHECK: @ule_min_red
|
||||
; CHECK: icmp ule <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
; CHECK: middle.block
|
||||
; CHECK: icmp ugt <2 x i32>
|
||||
; CHECK: select <2 x i1>
|
||||
|
||||
define i32 @ule_min_red(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%cmp3 = icmp ule i32 %0, %max.red.08
|
||||
%max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; No reduction.
|
||||
; CHECK: @no_red_1
|
||||
; CHECK-NOT: icmp <2 x i32>
|
||||
define i32 @no_red_1(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%1 = load i32* %arrayidx1, align 4
|
||||
%cmp3 = icmp sgt i32 %0, %1
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
||||
|
||||
; CHECK: @no_red_2
|
||||
; CHECK-NOT: icmp <2 x i32>
|
||||
define i32 @no_red_2(i32 %max) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body:
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
%arrayidx1 = getelementptr inbounds [1024 x i32]* @A, i64 1, i64 %indvars.iv
|
||||
%0 = load i32* %arrayidx, align 4
|
||||
%1 = load i32* %arrayidx1, align 4
|
||||
%cmp3 = icmp sgt i32 %0, %max.red.08
|
||||
%max.red.0 = select i1 %cmp3, i32 %0, i32 %1
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1024
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end:
|
||||
ret i32 %max.red.0
|
||||
}
|
Loading…
Reference in New Issue
Block a user