[InstCombine][CodeGenPrep] Create llvm.uadd.with.overflow in CGP.

Summary: This change moves creating calls to `llvm.uadd.with.overflow` from InstCombine to CodeGenPrep. Combining overflow check patterns into calls to the said intrinsic in InstCombine inhibits optimization because it introduces an intrinsic call that not all other transforms and analyses understand. Depends on D8888. Reviewers: majnemer, atrick Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D8889 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234638 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-09 01:38:03 +00:00 · 2015-04-10 21:07:09 +00:00 · 2015-04-10 21:07:09 +00:00 · 8aca90e5b6
commit 8aca90e5b6
parent 79db0a6fd9
6 changed files with 200 additions and 102 deletions
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@ -295,6 +295,9 @@ template <typename Class> struct bind_ty {
 /// \brief Match a value, capturing it if we match.
 inline bind_ty<Value> m_Value(Value *&V) { return V; }

+/// \brief Match an instruction, capturing it if we match.
+inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
+
 /// \brief Match a binary operator, capturing it if we match.
 inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }

@ -1103,6 +1106,52 @@ m_UnordFMax(const LHS &L, const RHS &R) {
  return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
 }

+//===----------------------------------------------------------------------===//
+// Matchers for overflow check patterns: e.g. (a + b) u< a
+//
+
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+struct UAddWithOverflow_match {
+  LHS_t L;
+  RHS_t R;
+  Sum_t S;
+
+  UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
+      : L(L), R(R), S(S) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    Value *ICmpLHS, *ICmpRHS;
+    ICmpInst::Predicate Pred;
+    if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
+      return false;
+
+    Value *AddLHS, *AddRHS;
+    auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
+
+    // (a + b) u< a, (a + b) u< b
+    if (Pred == ICmpInst::ICMP_ULT)
+      if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
+
+    // a >u (a + b), b >u (a + b)
+    if (Pred == ICmpInst::ICMP_UGT)
+      if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
+        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
+
+    return false;
+  }
+};
+
+/// \brief Match an icmp instruction checking for unsigned overflow on addition.
+///
+/// S is matched to the addition whose result is being checked for overflow, and
+/// L and R are matched to the LHS and RHS of S.
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
+m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
+  return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
+}
+
 /// \brief Match an 'unordered' floating point minimum function.
 /// Floating point has one special value 'NaN'. Therefore, there is no total
 /// order. However, if we can ignore the 'NaN' value (for example, because of a
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@ -747,13 +747,60 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
  return SinkCast(CI);
 }

-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+  Value *A, *B;
+  Instruction *AddI;
+  if (!match(CI,
+             m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+    return false;
+
+  Type *Ty = AddI->getType();
+  if (!isa<IntegerType>(Ty))
+    return false;
+
+  // We don't want to move around uses of condition values this late, so we we
+  // check if it is legal to create the call to the intrinsic in the basic
+  // block containing the icmp:
+
+  if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+    return false;
+
+#ifndef NDEBUG
+  // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+  // for now:
+  if (AddI->hasOneUse())
+    assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+  Module *M = CI->getParent()->getParent()->getParent();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+  auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+  auto *UAddWithOverflow =
+      CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+  auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+  auto *Overflow =
+      ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+  CI->replaceAllUsesWith(Overflow);
+  AddI->replaceAllUsesWith(UAdd);
+  CI->eraseFromParent();
+  AddI->eraseFromParent();
+  return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
 /// the number of virtual registers that must be created and coalesced.  This is
 /// a clear win except on targets with multiple condition code registers
 ///  (PowerPC), where it might lose; some adjustment may be wanted there.
 ///
 /// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
  BasicBlock *DefBB = CI->getParent();

  /// InsertedCmp - Only insert a cmp in each block once.
@ -802,6 +849,16 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
  return MadeChange;
 }

+static bool OptimizeCmpExpression(CmpInst *CI) {
+  if (SinkCmpExpression(CI))
+    return true;
+
+  if (CombineUAddWithOverflow(CI))
+    return true;
+
+  return false;
+}
+
 /// isExtractBitsCandidateUse - Check if the candidates could
 /// be combined with shift instruction, which includes:
 /// 1. Truncate instruction
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@ -2109,35 +2109,6 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
  return ExtractValueInst::Create(Call, 1, "sadd.overflow");
 }

-static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
-                                     InstCombiner &IC) {
-  // Don't bother doing this transformation for pointers, don't do it for
-  // vectors.
-  if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
-
-  // If the add is a constant expr, then we don't bother transforming it.
-  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
-  if (!OrigAdd) return nullptr;
-
-  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
-
-  // Put the new code above the original add, in case there are any uses of the
-  // add between the add and the compare.
-  InstCombiner::BuilderTy *Builder = IC.Builder;
-  Builder->SetInsertPoint(OrigAdd);
-
-  Module *M = I.getParent()->getParent()->getParent();
-  Type *Ty = LHS->getType();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
-  Value *Add = Builder->CreateExtractValue(Call, 0);
-
-  IC.ReplaceInstUsesWith(*OrigAdd, Add);
-
-  // The original icmp gets replaced with the overflow value.
-  return ExtractValueInst::Create(Call, 1, "uadd.overflow");
-}
-
 bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
                                         Value *RHS, Instruction &OrigI,
                                         Value *&Result, Constant *&Overflow) {
@ -3539,21 +3510,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
        return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
    }

-    // (a+b) <u a  --> llvm.uadd.with.overflow.
-    // (a+b) <u b  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
-        match(Op0, m_Add(m_Value(A), m_Value(B))) &&
-        (Op1 == A || Op1 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
-        return R;
-
-    // a >u (a+b)  --> llvm.uadd.with.overflow.
-    // b >u (a+b)  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
-        (Op0 == A || Op0 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
-        return R;
+    Instruction *AddI = nullptr;
+    if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+                                     m_Instruction(AddI))) &&
+        isa<IntegerType>(A->getType())) {
+      Value *Result;
+      Constant *Overflow;
+      if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+                                Overflow)) {
+        ReplaceInstUsesWith(*AddI, Result);
+        return ReplaceInstUsesWith(I, Overflow);
+      }
+    }

    // (zext a) * (zext b)  --> llvm.umul.with.overflow.
    if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
--- a/test/CodeGen/X86/add-of-carry.ll
+++ b/test/CodeGen/X86/add-of-carry.ll
@ -4,9 +4,10 @@
 define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
 entry:
 ; CHECK-LABEL: test1:
-; CHECK: cmpl %ecx, %eax 
-; CHECK-NOT: addl
-; CHECK: adcl $0, %eax
+; CHECK: movl
+; CHECK-NEXT: addl
+; CHECK-NEXT: adcl $0
+; CHECK-NEXT: ret
  %add4 = add i32 %x, %sum
  %cmp = icmp ult i32 %add4, %x
  %inc = zext i1 %cmp to i32
@ -14,33 +15,15 @@ entry:
  ret i32 %z.0
 }

-; Instcombine transforms test1 into test2:
-; CHECK-LABEL: test2:
-; CHECK: movl
-; CHECK-NEXT: addl
-; CHECK-NEXT: adcl $0
-; CHECK-NEXT: ret
-define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
-entry:
-  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
-  %0 = extractvalue { i32, i1 } %uadd, 0
-  %cmp = extractvalue { i32, i1 } %uadd, 1
-  %inc = zext i1 %cmp to i32
-  %z.0 = add i32 %0, %inc
-  ret i32 %z.0
-}
-
 ; <rdar://problem/12579915>
-define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
+define i32 @test2(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
 entry:
  %cmp = icmp ugt i32 %x, %y
  %dec = sext i1 %cmp to i32
  %dec.res = add nsw i32 %dec, %res
  ret i32 %dec.res
-; CHECK-LABEL: test3:
+; CHECK-LABEL: test2:
 ; CHECK: cmpl
 ; CHECK: sbbl
 ; CHECK: ret
 }
-
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
--- a/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll
+++ b/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll
@ -0,0 +1,74 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK-LABEL: @test1(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test1(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %a
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test2(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test2(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test3(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test3(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK-LABEL: @test4(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: extractvalue
+; CHECK: extractvalue
+; CHECK: select
+define i64 @test4(i64 %a, i64 %b, i1 %c) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+ next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+ exit:
+  ret i64 0
+}
+
+; CHECK-LABEL: @test5(
+; CHECK-NOT: llvm.uadd.with.overflow
+; CHECK: next
+define i64 @test5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  store i64 %add, i64* %ptr
+  %cmp = icmp ugt i64 %b, %add
+  br i1 %c, label %next, label %exit
+
+ next:
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+
+ exit:
+  ret i64 0
+}
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@ -97,39 +97,6 @@ if.end:                                           ; preds = %entry
 ; CHECK: ret i8
 }

-; CHECK-LABEL: @test5(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test5(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ult i64 %add, %a
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
-; CHECK-LABEL: @test6(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test6(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ult i64 %add, %b
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
-; CHECK-LABEL: @test7(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test7(i64 %a, i64 %b) nounwind ssp {
-entry:
-  %add = add i64 %b, %a
-  %cmp = icmp ugt i64 %b, %add
-  %Q = select i1 %cmp, i64 %b, i64 42
-  ret i64 %Q
-}
-
 ; CHECK-LABEL: @test8(
 ; PR11438
 ; This is @test1, but the operands are not sign-extended.  Make sure