mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-17 21:35:07 +00:00
LoopVectorize: Add support for floating point min/max reductions
Add support for min/max reductions when "no-nans-float-math" is enabled. This allows us to assume we have ordered floating point math and treat ordered and unordered predicates equally. radar://13723044 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181144 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e79d92c592
commit
87defd0924
@ -335,7 +335,7 @@ public:
|
|||||||
DominatorTree *DT, TargetTransformInfo* TTI,
|
DominatorTree *DT, TargetTransformInfo* TTI,
|
||||||
AliasAnalysis *AA, TargetLibraryInfo *TLI)
|
AliasAnalysis *AA, TargetLibraryInfo *TLI)
|
||||||
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
|
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI),
|
||||||
Induction(0) {}
|
Induction(0), HasFunNoNaNAttr(false) {}
|
||||||
|
|
||||||
/// This enum represents the kinds of reductions that we support.
|
/// This enum represents the kinds of reductions that we support.
|
||||||
enum ReductionKind {
|
enum ReductionKind {
|
||||||
@ -347,7 +347,8 @@ public:
|
|||||||
RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
|
RK_IntegerXor, ///< Bitwise or logical XOR of numbers.
|
||||||
RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
|
RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()).
|
||||||
RK_FloatAdd, ///< Sum of floats.
|
RK_FloatAdd, ///< Sum of floats.
|
||||||
RK_FloatMult ///< Product of floats.
|
RK_FloatMult, ///< Product of floats.
|
||||||
|
RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()).
|
||||||
};
|
};
|
||||||
|
|
||||||
/// This enum represents the kinds of inductions that we support.
|
/// This enum represents the kinds of inductions that we support.
|
||||||
@ -365,7 +366,9 @@ public:
|
|||||||
MRK_UIntMin,
|
MRK_UIntMin,
|
||||||
MRK_UIntMax,
|
MRK_UIntMax,
|
||||||
MRK_SIntMin,
|
MRK_SIntMin,
|
||||||
MRK_SIntMax
|
MRK_SIntMax,
|
||||||
|
MRK_FloatMin,
|
||||||
|
MRK_FloatMax
|
||||||
};
|
};
|
||||||
|
|
||||||
/// This POD struct holds information about reduction variables.
|
/// This POD struct holds information about reduction variables.
|
||||||
@ -586,6 +589,8 @@ private:
|
|||||||
/// We need to check that all of the pointers in this list are disjoint
|
/// We need to check that all of the pointers in this list are disjoint
|
||||||
/// at runtime.
|
/// at runtime.
|
||||||
RuntimePointerCheck PtrRtCheck;
|
RuntimePointerCheck PtrRtCheck;
|
||||||
|
/// Can we assume the absence of NaNs.
|
||||||
|
bool HasFunNoNaNAttr;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// LoopVectorizationCostModel - estimates the expected speedups due to
|
/// LoopVectorizationCostModel - estimates the expected speedups due to
|
||||||
@ -1648,6 +1653,8 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
|
|||||||
return Instruction::FAdd;
|
return Instruction::FAdd;
|
||||||
case LoopVectorizationLegality::RK_IntegerMinMax:
|
case LoopVectorizationLegality::RK_IntegerMinMax:
|
||||||
return Instruction::ICmp;
|
return Instruction::ICmp;
|
||||||
|
case LoopVectorizationLegality::RK_FloatMinMax:
|
||||||
|
return Instruction::FCmp;
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("Unknown reduction operation");
|
llvm_unreachable("Unknown reduction operation");
|
||||||
}
|
}
|
||||||
@ -1672,8 +1679,21 @@ Value *createMinMaxOp(IRBuilder<> &Builder,
|
|||||||
break;
|
break;
|
||||||
case LoopVectorizationLegality::MRK_SIntMax:
|
case LoopVectorizationLegality::MRK_SIntMax:
|
||||||
P = CmpInst::ICMP_SGT;
|
P = CmpInst::ICMP_SGT;
|
||||||
|
break;
|
||||||
|
case LoopVectorizationLegality::MRK_FloatMin:
|
||||||
|
P = CmpInst::FCMP_OLT;
|
||||||
|
break;
|
||||||
|
case LoopVectorizationLegality::MRK_FloatMax:
|
||||||
|
P = CmpInst::FCMP_OGT;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
Value *Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
|
|
||||||
|
Value *Cmp;
|
||||||
|
if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax)
|
||||||
|
Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
|
||||||
|
else
|
||||||
|
Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
|
||||||
|
|
||||||
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
|
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
|
||||||
return Select;
|
return Select;
|
||||||
}
|
}
|
||||||
@ -1743,11 +1763,12 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||||||
// one for multiplication, -1 for And.
|
// one for multiplication, -1 for And.
|
||||||
Value *Identity;
|
Value *Identity;
|
||||||
Value *VectorStart;
|
Value *VectorStart;
|
||||||
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax)
|
if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
|
||||||
|
RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
|
||||||
// MinMax reduction have the start value as their identify.
|
// MinMax reduction have the start value as their identify.
|
||||||
VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
|
VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
|
||||||
"minmax.ident");
|
"minmax.ident");
|
||||||
else {
|
} else {
|
||||||
Constant *Iden =
|
Constant *Iden =
|
||||||
LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
|
LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
|
||||||
VecTy->getScalarType());
|
VecTy->getScalarType());
|
||||||
@ -1801,7 +1822,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||||||
Value *ReducedPartRdx = RdxParts[0];
|
Value *ReducedPartRdx = RdxParts[0];
|
||||||
unsigned Op = getReductionBinOp(RdxDesc.Kind);
|
unsigned Op = getReductionBinOp(RdxDesc.Kind);
|
||||||
for (unsigned part = 1; part < UF; ++part) {
|
for (unsigned part = 1; part < UF; ++part) {
|
||||||
if (Op != Instruction::ICmp)
|
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
|
||||||
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
|
ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
|
||||||
RdxParts[part], ReducedPartRdx,
|
RdxParts[part], ReducedPartRdx,
|
||||||
"bin.rdx");
|
"bin.rdx");
|
||||||
@ -1832,7 +1853,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
|
|||||||
ConstantVector::get(ShuffleMask),
|
ConstantVector::get(ShuffleMask),
|
||||||
"rdx.shuf");
|
"rdx.shuf");
|
||||||
|
|
||||||
if (Op != Instruction::ICmp)
|
if (Op != Instruction::ICmp && Op != Instruction::FCmp)
|
||||||
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
|
TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
|
||||||
"bin.rdx");
|
"bin.rdx");
|
||||||
else
|
else
|
||||||
@ -2363,6 +2384,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Look for the attribute signaling the absence of NaNs.
|
||||||
|
Function &F = *Header->getParent();
|
||||||
|
if (F.hasFnAttribute("no-nans-fp-math"))
|
||||||
|
HasFunNoNaNAttr = F.getAttributes().getAttribute(
|
||||||
|
AttributeSet::FunctionIndex,
|
||||||
|
"no-nans-fp-math").getValueAsString() == "true";
|
||||||
|
|
||||||
// For each block in the loop.
|
// For each block in the loop.
|
||||||
for (Loop::block_iterator bb = TheLoop->block_begin(),
|
for (Loop::block_iterator bb = TheLoop->block_begin(),
|
||||||
be = TheLoop->block_end(); bb != be; ++bb) {
|
be = TheLoop->block_end(); bb != be; ++bb) {
|
||||||
@ -2444,6 +2472,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
|
|||||||
DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
|
DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (AddReductionVar(Phi, RK_FloatMinMax)) {
|
||||||
|
DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
|
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
|
||||||
return false;
|
return false;
|
||||||
@ -2869,7 +2901,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
|||||||
// such that we don't stop when we see the phi has two uses (one by the select
|
// such that we don't stop when we see the phi has two uses (one by the select
|
||||||
// and one by the icmp) and to make sure we only see exactly the two
|
// and one by the icmp) and to make sure we only see exactly the two
|
||||||
// instructions.
|
// instructions.
|
||||||
unsigned NumICmpSelectPatternInst = 0;
|
unsigned NumCmpSelectPatternInst = 0;
|
||||||
ReductionInstDesc ReduxDesc(false, 0);
|
ReductionInstDesc ReduxDesc(false, 0);
|
||||||
|
|
||||||
// Avoid cycles in the chain.
|
// Avoid cycles in the chain.
|
||||||
@ -2918,7 +2950,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
|||||||
|
|
||||||
// We can't have multiple inside users except for a combination of
|
// We can't have multiple inside users except for a combination of
|
||||||
// icmp/select both using the phi.
|
// icmp/select both using the phi.
|
||||||
if (FoundInBlockUser && !NumICmpSelectPatternInst)
|
if (FoundInBlockUser && !NumCmpSelectPatternInst)
|
||||||
return false;
|
return false;
|
||||||
FoundInBlockUser = true;
|
FoundInBlockUser = true;
|
||||||
|
|
||||||
@ -2927,14 +2959,15 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
|||||||
if (!ReduxDesc.IsReduction)
|
if (!ReduxDesc.IsReduction)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) ||
|
if (Kind == RK_IntegerMinMax && (isa<ICmpInst>(U) || isa<SelectInst>(U)))
|
||||||
isa<SelectInst>(U)))
|
++NumCmpSelectPatternInst;
|
||||||
++NumICmpSelectPatternInst;
|
if (Kind == RK_FloatMinMax && (isa<FCmpInst>(U) || isa<SelectInst>(U)))
|
||||||
|
++NumCmpSelectPatternInst;
|
||||||
|
|
||||||
// Reductions of instructions such as Div, and Sub is only
|
// Reductions of instructions such as Div, and Sub is only
|
||||||
// possible if the LHS is the reduction variable.
|
// possible if the LHS is the reduction variable.
|
||||||
if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
|
if (!U->isCommutative() && !isa<PHINode>(U) && !isa<SelectInst>(U) &&
|
||||||
!isa<ICmpInst>(U) && U->getOperand(0) != Iter)
|
!isa<ICmpInst>(U) && !isa<FCmpInst>(U) && U->getOperand(0) != Iter)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
Iter = ReduxDesc.PatternLastInst;
|
Iter = ReduxDesc.PatternLastInst;
|
||||||
@ -2942,7 +2975,8 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
|||||||
|
|
||||||
// This means we have seen one but not the other instruction of the
|
// This means we have seen one but not the other instruction of the
|
||||||
// pattern or more than just a select and cmp.
|
// pattern or more than just a select and cmp.
|
||||||
if (Kind == RK_IntegerMinMax && NumICmpSelectPatternInst != 2)
|
if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
|
||||||
|
NumCmpSelectPatternInst != 2)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// We found a reduction var if we have reached the original
|
// We found a reduction var if we have reached the original
|
||||||
@ -2968,16 +3002,17 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
|
|||||||
/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
|
/// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
|
||||||
/// pattern corresponding to a min(X, Y) or max(X, Y).
|
/// pattern corresponding to a min(X, Y) or max(X, Y).
|
||||||
LoopVectorizationLegality::ReductionInstDesc
|
LoopVectorizationLegality::ReductionInstDesc
|
||||||
LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionInstDesc &Prev) {
|
LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
|
||||||
|
ReductionInstDesc &Prev) {
|
||||||
|
|
||||||
assert((isa<ICmpInst>(I) || isa<SelectInst>(I)) &&
|
assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
|
||||||
"Expect a select instruction");
|
"Expect a select instruction");
|
||||||
ICmpInst *Cmp = 0;
|
Instruction *Cmp = 0;
|
||||||
SelectInst *Select = 0;
|
SelectInst *Select = 0;
|
||||||
|
|
||||||
// We must handle the select(cmp()) as a single instruction. Advance to the
|
// We must handle the select(cmp()) as a single instruction. Advance to the
|
||||||
// select.
|
// select.
|
||||||
if ((Cmp = dyn_cast<ICmpInst>(I))) {
|
if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
|
||||||
if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
|
if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->use_begin())))
|
||||||
return ReductionInstDesc(false, I);
|
return ReductionInstDesc(false, I);
|
||||||
return ReductionInstDesc(Select, Prev.MinMaxKind);
|
return ReductionInstDesc(Select, Prev.MinMaxKind);
|
||||||
@ -2986,7 +3021,8 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionIns
|
|||||||
// Only handle single use cases for now.
|
// Only handle single use cases for now.
|
||||||
if (!(Select = dyn_cast<SelectInst>(I)))
|
if (!(Select = dyn_cast<SelectInst>(I)))
|
||||||
return ReductionInstDesc(false, I);
|
return ReductionInstDesc(false, I);
|
||||||
if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))))
|
if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
|
||||||
|
!(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
|
||||||
return ReductionInstDesc(false, I);
|
return ReductionInstDesc(false, I);
|
||||||
if (!Cmp->hasOneUse())
|
if (!Cmp->hasOneUse())
|
||||||
return ReductionInstDesc(false, I);
|
return ReductionInstDesc(false, I);
|
||||||
@ -3003,6 +3039,14 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionIns
|
|||||||
return ReductionInstDesc(Select, MRK_SIntMax);
|
return ReductionInstDesc(Select, MRK_SIntMax);
|
||||||
else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
|
else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
|
||||||
return ReductionInstDesc(Select, MRK_SIntMin);
|
return ReductionInstDesc(Select, MRK_SIntMin);
|
||||||
|
else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
|
||||||
|
return ReductionInstDesc(Select, MRK_FloatMin);
|
||||||
|
else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
|
||||||
|
return ReductionInstDesc(Select, MRK_FloatMax);
|
||||||
|
else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
|
||||||
|
return ReductionInstDesc(Select, MRK_FloatMin);
|
||||||
|
else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
|
||||||
|
return ReductionInstDesc(Select, MRK_FloatMax);
|
||||||
|
|
||||||
return ReductionInstDesc(false, I);
|
return ReductionInstDesc(false, I);
|
||||||
}
|
}
|
||||||
@ -3017,7 +3061,8 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
|
|||||||
default:
|
default:
|
||||||
return ReductionInstDesc(false, I);
|
return ReductionInstDesc(false, I);
|
||||||
case Instruction::PHI:
|
case Instruction::PHI:
|
||||||
if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd))
|
if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd &&
|
||||||
|
Kind != RK_FloatMinMax))
|
||||||
return ReductionInstDesc(false, I);
|
return ReductionInstDesc(false, I);
|
||||||
return ReductionInstDesc(I, Prev.MinMaxKind);
|
return ReductionInstDesc(I, Prev.MinMaxKind);
|
||||||
case Instruction::Sub:
|
case Instruction::Sub:
|
||||||
@ -3035,9 +3080,11 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
|
|||||||
return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
|
return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I);
|
||||||
case Instruction::FAdd:
|
case Instruction::FAdd:
|
||||||
return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
|
return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I);
|
||||||
|
case Instruction::FCmp:
|
||||||
case Instruction::ICmp:
|
case Instruction::ICmp:
|
||||||
case Instruction::Select:
|
case Instruction::Select:
|
||||||
if (Kind != RK_IntegerMinMax)
|
if (Kind != RK_IntegerMinMax &&
|
||||||
|
(!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
|
||||||
return ReductionInstDesc(false, I);
|
return ReductionInstDesc(false, I);
|
||||||
return isMinMaxSelectCmpPattern(I, Prev);
|
return isMinMaxSelectCmpPattern(I, Prev);
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
|
||||||
@A = common global [1024 x i32] zeroinitializer, align 16
|
@A = common global [1024 x i32] zeroinitializer, align 16
|
||||||
|
@fA = common global [1024 x float] zeroinitializer, align 16
|
||||||
|
@dA = common global [1024 x double] zeroinitializer, align 16
|
||||||
|
|
||||||
; Signed tests.
|
; Signed tests.
|
||||||
|
|
||||||
@ -403,3 +405,481 @@ for.body:
|
|||||||
for.end:
|
for.end:
|
||||||
ret i32 %max.red.0
|
ret i32 %max.red.0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Float tests.
|
||||||
|
|
||||||
|
; Maximum.
|
||||||
|
|
||||||
|
; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
|
||||||
|
; CHECK: @max_red_float
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @max_red_float(float %max) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ogt float %0, %max.red.08
|
||||||
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %max.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @max_red_float_ge
|
||||||
|
; CHECK: fcmp oge <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @max_red_float_ge(float %max) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp oge float %0, %max.red.08
|
||||||
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %max.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @inverted_max_red_float
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @inverted_max_red_float(float %max) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp olt float %0, %max.red.08
|
||||||
|
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %max.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @inverted_max_red_float_le
|
||||||
|
; CHECK: fcmp ole <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @inverted_max_red_float_le(float %max) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ole float %0, %max.red.08
|
||||||
|
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %max.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @unordered_max_red
|
||||||
|
; CHECK: fcmp ugt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @unordered_max_red_float(float %max) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ugt float %0, %max.red.08
|
||||||
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %max.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @unordered_max_red_float_ge
|
||||||
|
; CHECK: fcmp uge <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @unordered_max_red_float_ge(float %max) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp uge float %0, %max.red.08
|
||||||
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %max.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @inverted_unordered_max_red
|
||||||
|
; CHECK: fcmp ult <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @inverted_unordered_max_red_float(float %max) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ult float %0, %max.red.08
|
||||||
|
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %max.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @inverted_unordered_max_red_float_le
|
||||||
|
; CHECK: fcmp ule <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @inverted_unordered_max_red_float_le(float %max) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ule float %0, %max.red.08
|
||||||
|
%max.red.0 = select i1 %cmp3, float %max.red.08, float %0
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %max.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; Minimum.
|
||||||
|
|
||||||
|
; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
|
||||||
|
; CHECK: @min_red_float
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @min_red_float(float %min) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp olt float %0, %min.red.08
|
||||||
|
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %min.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @min_red_float_le
|
||||||
|
; CHECK: fcmp ole <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @min_red_float_le(float %min) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ole float %0, %min.red.08
|
||||||
|
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %min.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @inverted_min_red_float
|
||||||
|
; CHECK: fcmp ogt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @inverted_min_red_float(float %min) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ogt float %0, %min.red.08
|
||||||
|
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %min.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @inverted_min_red_float_ge
|
||||||
|
; CHECK: fcmp oge <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @inverted_min_red_float_ge(float %min) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp oge float %0, %min.red.08
|
||||||
|
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %min.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @unordered_min_red
|
||||||
|
; CHECK: fcmp ult <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @unordered_min_red_float(float %min) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ult float %0, %min.red.08
|
||||||
|
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %min.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @unordered_min_red_float_le
|
||||||
|
; CHECK: fcmp ule <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @unordered_min_red_float_le(float %min) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ule float %0, %min.red.08
|
||||||
|
%min.red.0 = select i1 %cmp3, float %0, float %min.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %min.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @inverted_unordered_min_red
|
||||||
|
; CHECK: fcmp ugt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @inverted_unordered_min_red_float(float %min) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ugt float %0, %min.red.08
|
||||||
|
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %min.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: @inverted_unordered_min_red_float_ge
|
||||||
|
; CHECK: fcmp uge <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp olt <2 x float>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define float @inverted_unordered_min_red_float_ge(float %min) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp uge float %0, %min.red.08
|
||||||
|
%min.red.0 = select i1 %cmp3, float %min.red.08, float %0
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %min.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
; Make sure we handle doubles, too.
|
||||||
|
; CHECK: @min_red_double
|
||||||
|
; CHECK: fcmp olt <2 x double>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
; CHECK: middle.block
|
||||||
|
; CHECK: fcmp olt <2 x double>
|
||||||
|
; CHECK: select <2 x i1>
|
||||||
|
|
||||||
|
define double @min_red_double(double %min) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x double]* @dA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load double* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp olt double %0, %min.red.08
|
||||||
|
%min.red.0 = select i1 %cmp3, double %0, double %min.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret double %min.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; Don't this into a max reduction. The no-nans-fp-math attribute is missing
|
||||||
|
; CHECK: @max_red_float_nans
|
||||||
|
; CHECK-NOT: <2 x float>
|
||||||
|
|
||||||
|
define float @max_red_float_nans(float %max) {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body:
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp3 = fcmp ogt float %0, %max.red.08
|
||||||
|
%max.red.0 = select i1 %cmp3, float %0, float %max.red.08
|
||||||
|
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret float %max.red.0
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
attributes #0 = { "no-nans-fp-math"="true" }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user