[InstCombine][CodeGenPrep] Create llvm.uadd.with.overflow in CGP.

Summary:
This change moves creating calls to `llvm.uadd.with.overflow` from
InstCombine to CodeGenPrep.  Combining overflow check patterns into
calls to the said intrinsic in InstCombine inhibits optimization because
it introduces an intrinsic call that not all other transforms and
analyses understand.

Depends on D8888.

Reviewers: majnemer, atrick

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D8889

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234638 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjoy Das 2015-04-10 21:07:09 +00:00
parent 79db0a6fd9
commit 8aca90e5b6
6 changed files with 200 additions and 102 deletions

View File

@ -295,6 +295,9 @@ template <typename Class> struct bind_ty {
/// \brief Match a value, capturing it if we match.
inline bind_ty<Value> m_Value(Value *&V) { return V; }
/// \brief Match an instruction, capturing it if we match.
inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
/// \brief Match a binary operator, capturing it if we match.
inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
@ -1103,6 +1106,52 @@ m_UnordFMax(const LHS &L, const RHS &R) {
return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
}
//===----------------------------------------------------------------------===//
// Matchers for overflow check patterns: e.g. (a + b) u< a
//
template <typename LHS_t, typename RHS_t, typename Sum_t>
struct UAddWithOverflow_match {
LHS_t L;
RHS_t R;
Sum_t S;
UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
: L(L), R(R), S(S) {}
template <typename OpTy> bool match(OpTy *V) {
Value *ICmpLHS, *ICmpRHS;
ICmpInst::Predicate Pred;
if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
return false;
Value *AddLHS, *AddRHS;
auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
// (a + b) u< a, (a + b) u< b
if (Pred == ICmpInst::ICMP_ULT)
if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
// a >u (a + b), b >u (a + b)
if (Pred == ICmpInst::ICMP_UGT)
if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
return false;
}
};
/// \brief Match an icmp instruction checking for unsigned overflow on addition.
///
/// S is matched to the addition whose result is being checked for overflow, and
/// L and R are matched to the LHS and RHS of S.
template <typename LHS_t, typename RHS_t, typename Sum_t>
UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
}
/// \brief Match an 'unordered' floating point minimum function.
/// Floating point has one special value 'NaN'. Therefore, there is no total
/// order. However, if we can ignore the 'NaN' value (for example, because of a

View File

@ -747,13 +747,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
return SinkCast(CI);
}
/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
/// CombineUAddWithOverflow - try to combine CI into a call to the
/// llvm.uadd.with.overflow intrinsic if possible.
///
/// Return true if any changes were made.
static bool CombineUAddWithOverflow(CmpInst *CI) {
Value *A, *B;
Instruction *AddI;
if (!match(CI,
m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
return false;
Type *Ty = AddI->getType();
if (!isa<IntegerType>(Ty))
return false;
// We don't want to move around uses of condition values this late, so we we
// check if it is legal to create the call to the intrinsic in the basic
// block containing the icmp:
if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
return false;
#ifndef NDEBUG
// Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
// for now:
if (AddI->hasOneUse())
assert(*AddI->user_begin() == CI && "expected!");
#endif
Module *M = CI->getParent()->getParent()->getParent();
Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
auto *UAddWithOverflow =
CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
auto *Overflow =
ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
CI->replaceAllUsesWith(Overflow);
AddI->replaceAllUsesWith(UAdd);
CI->eraseFromParent();
AddI->eraseFromParent();
return true;
}
/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
/// the number of virtual registers that must be created and coalesced. This is
/// a clear win except on targets with multiple condition code registers
/// (PowerPC), where it might lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
static bool OptimizeCmpExpression(CmpInst *CI) {
static bool SinkCmpExpression(CmpInst *CI) {
BasicBlock *DefBB = CI->getParent();
/// InsertedCmp - Only insert a cmp in each block once.
@ -802,6 +849,16 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
return MadeChange;
}
static bool OptimizeCmpExpression(CmpInst *CI) {
if (SinkCmpExpression(CI))
return true;
if (CombineUAddWithOverflow(CI))
return true;
return false;
}
/// isExtractBitsCandidateUse - Check if the candidates could
/// be combined with shift instruction, which includes:
/// 1. Truncate instruction

View File

@ -2109,35 +2109,6 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
InstCombiner &IC) {
// Don't bother doing this transformation for pointers, don't do it for
// vectors.
if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
// If the add is a constant expr, then we don't bother transforming it.
Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
if (!OrigAdd) return nullptr;
Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
// Put the new code above the original add, in case there are any uses of the
// add between the add and the compare.
InstCombiner::BuilderTy *Builder = IC.Builder;
Builder->SetInsertPoint(OrigAdd);
Module *M = I.getParent()->getParent()->getParent();
Type *Ty = LHS->getType();
Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
Value *Add = Builder->CreateExtractValue(Call, 0);
IC.ReplaceInstUsesWith(*OrigAdd, Add);
// The original icmp gets replaced with the overflow value.
return ExtractValueInst::Create(Call, 1, "uadd.overflow");
}
bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
Value *RHS, Instruction &OrigI,
Value *&Result, Constant *&Overflow) {
@ -3539,21 +3510,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
}
// (a+b) <u a --> llvm.uadd.with.overflow.
// (a+b) <u b --> llvm.uadd.with.overflow.
if (I.getPredicate() == ICmpInst::ICMP_ULT &&
match(Op0, m_Add(m_Value(A), m_Value(B))) &&
(Op1 == A || Op1 == B))
if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
return R;
// a >u (a+b) --> llvm.uadd.with.overflow.
// b >u (a+b) --> llvm.uadd.with.overflow.
if (I.getPredicate() == ICmpInst::ICMP_UGT &&
match(Op1, m_Add(m_Value(A), m_Value(B))) &&
(Op0 == A || Op0 == B))
if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
return R;
Instruction *AddI = nullptr;
if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
m_Instruction(AddI))) &&
isa<IntegerType>(A->getType())) {
Value *Result;
Constant *Overflow;
if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
Overflow)) {
ReplaceInstUsesWith(*AddI, Result);
return ReplaceInstUsesWith(I, Overflow);
}
}
// (zext a) * (zext b) --> llvm.umul.with.overflow.
if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {

View File

@ -4,9 +4,10 @@
define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
entry:
; CHECK-LABEL: test1:
; CHECK: cmpl %ecx, %eax
; CHECK-NOT: addl
; CHECK: adcl $0, %eax
; CHECK: movl
; CHECK-NEXT: addl
; CHECK-NEXT: adcl $0
; CHECK-NEXT: ret
%add4 = add i32 %x, %sum
%cmp = icmp ult i32 %add4, %x
%inc = zext i1 %cmp to i32
@ -14,33 +15,15 @@ entry:
ret i32 %z.0
}
; Instcombine transforms test1 into test2:
; CHECK-LABEL: test2:
; CHECK: movl
; CHECK-NEXT: addl
; CHECK-NEXT: adcl $0
; CHECK-NEXT: ret
define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
entry:
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
%0 = extractvalue { i32, i1 } %uadd, 0
%cmp = extractvalue { i32, i1 } %uadd, 1
%inc = zext i1 %cmp to i32
%z.0 = add i32 %0, %inc
ret i32 %z.0
}
; <rdar://problem/12579915>
define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
define i32 @test2(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
entry:
%cmp = icmp ugt i32 %x, %y
%dec = sext i1 %cmp to i32
%dec.res = add nsw i32 %dec, %res
ret i32 %dec.res
; CHECK-LABEL: test3:
; CHECK-LABEL: test2:
; CHECK: cmpl
; CHECK: sbbl
; CHECK: ret
}
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone

View File

@ -0,0 +1,74 @@
; RUN: opt -codegenprepare -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
; CHECK-LABEL: @test1(
; CHECK: llvm.uadd.with.overflow
; CHECK: ret i64
define i64 @test1(i64 %a, i64 %b) nounwind ssp {
entry:
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
%Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q
}
; CHECK-LABEL: @test2(
; CHECK: llvm.uadd.with.overflow
; CHECK: ret i64
define i64 @test2(i64 %a, i64 %b) nounwind ssp {
entry:
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
%Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q
}
; CHECK-LABEL: @test3(
; CHECK: llvm.uadd.with.overflow
; CHECK: ret i64
define i64 @test3(i64 %a, i64 %b) nounwind ssp {
entry:
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
%Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q
}
; CHECK-LABEL: @test4(
; CHECK: llvm.uadd.with.overflow
; CHECK: extractvalue
; CHECK: extractvalue
; CHECK: select
define i64 @test4(i64 %a, i64 %b, i1 %c) nounwind ssp {
entry:
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
br i1 %c, label %next, label %exit
next:
%Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q
exit:
ret i64 0
}
; CHECK-LABEL: @test5(
; CHECK-NOT: llvm.uadd.with.overflow
; CHECK: next
define i64 @test5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp {
entry:
%add = add i64 %b, %a
store i64 %add, i64* %ptr
%cmp = icmp ugt i64 %b, %add
br i1 %c, label %next, label %exit
next:
%Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q
exit:
ret i64 0
}

View File

@ -97,39 +97,6 @@ if.end: ; preds = %entry
; CHECK: ret i8
}
; CHECK-LABEL: @test5(
; CHECK: llvm.uadd.with.overflow
; CHECK: ret i64
define i64 @test5(i64 %a, i64 %b) nounwind ssp {
entry:
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
%Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q
}
; CHECK-LABEL: @test6(
; CHECK: llvm.uadd.with.overflow
; CHECK: ret i64
define i64 @test6(i64 %a, i64 %b) nounwind ssp {
entry:
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
%Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q
}
; CHECK-LABEL: @test7(
; CHECK: llvm.uadd.with.overflow
; CHECK: ret i64
define i64 @test7(i64 %a, i64 %b) nounwind ssp {
entry:
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
%Q = select i1 %cmp, i64 %b, i64 42
ret i64 %Q
}
; CHECK-LABEL: @test8(
; PR11438
; This is @test1, but the operands are not sign-extended. Make sure