Infer known bits from dominating conditions

This patch adds limited support in ValueTracking for inferring known bits of a value from conditional expressions which must be true to reach the instruction we're trying to optimize. At this time, the feature is off by default. Once landed, I'm hoping for feedback from others on both profitability and compile time impact.

Forms of conditional value propagation have been tried in LLVM before and have failed due to compile time problems.  In an attempt to side step that, this patch only considers conditions where the edge leaving the branch dominates the context instruction. It does not attempt full dataflow.  Even with that restriction, it handles many interesting cases:
 * Early exits from functions
 * Early exits from loops (for context instructions in the loop and after the check)
 * Conditions which control entry into loops, including multi-version loops (such as those produced during vectorization, IRCE, loop unswitch, etc..)

Possible applications include optimizing using information provided by constructs such as: preconditions, assumptions, null checks, & range checks.

This patch implements two approaches to the problem that need further benchmarking.  Approach 1 is to directly walk the dominator tree looking for interesting conditions.  Approach 2 is to inspect other uses of the value being queried for interesting comparisons.  From initial benchmarking, it appears that Approach 2 is faster than Approach 1, but this needs to be further validated.  

Differential Revision: http://reviews.llvm.org/D7708



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231879 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Philip Reames 2015-03-10 22:43:20 +00:00
parent 57849e3bb4
commit 7c7b72a066
2 changed files with 364 additions and 0 deletions

View File

@ -39,6 +39,34 @@ using namespace llvm::PatternMatch;
const unsigned MaxDepth = 6; const unsigned MaxDepth = 6;
/// Enable an experimental feature to leverage information about dominating
/// conditions to compute known bits. The individual options below control how
/// hard we search. The defaults are choosen to be fairly aggressive. If you
/// run into compile time problems when testing, scale them back and report
/// your findings.
static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
cl::Hidden, cl::init(false));
// This is expensive, so we only do it for the top level query value.
// (TODO: evaluate cost vs profit, consider higher thresholds)
static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
cl::Hidden, cl::init(1));
/// How many dominating blocks should be scanned looking for dominating
/// conditions?
static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
cl::Hidden,
cl::init(20000));
// Controls the number of uses of the value searched for possible
// dominating comparisons.
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(2000));
// If true, don't consider only compares whose only use is a branch.
static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
cl::Hidden, cl::init(false));
/// Returns the bitwidth of the given scalar or pointer type (if unknown returns /// Returns the bitwidth of the given scalar or pointer type (if unknown returns
/// 0). For vector types, returns the element type's bitwidth. /// 0). For vector types, returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@ -470,6 +498,179 @@ m_c_Xor(const LHS &L, const RHS &R) {
return m_CombineOr(m_Xor(L, R), m_Xor(R, L)); return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
} }
/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
/// true (at the context instruction.) This is mostly a utility function for
/// the prototype dominating conditions reasoning below.
static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
APInt &KnownZero,
APInt &KnownOne,
const DataLayout &DL,
unsigned Depth, const Query &Q) {
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
// TODO: We could potentially be more aggressive here. This would be worth
// evaluating. If we can, explore commoning this code with the assume
// handling logic.
if (LHS != V && RHS != V)
return;
const unsigned BitWidth = KnownZero.getBitWidth();
switch (Cmp->getPredicate()) {
default:
// We know nothing from this condition
break;
// TODO: implement unsigned bound from below (known one bits)
// TODO: common condition check implementations with assumes
// TODO: implement other patterns from assume (e.g. V & B == A)
case ICmpInst::ICMP_SGT:
if (LHS == V) {
APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
// We know that the sign bit is zero.
KnownZero |= APInt::getSignBit(BitWidth);
}
}
break;
case ICmpInst::ICMP_EQ:
if (LHS == V)
computeKnownBits(RHS, KnownZero, KnownOne, DL, Depth + 1, Q);
else if (RHS == V)
computeKnownBits(LHS, KnownZero, KnownOne, DL, Depth + 1, Q);
else
llvm_unreachable("missing use?");
break;
case ICmpInst::ICMP_ULE:
if (LHS == V) {
APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
// The known zero bits carry over
unsigned SignBits = KnownZeroTemp.countLeadingOnes();
KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
}
break;
case ICmpInst::ICMP_ULT:
if (LHS == V) {
APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, DL, Depth + 1, Q);
// Whatever high bits in rhs are zero are known to be zero (if rhs is a
// power of 2, then one more).
unsigned SignBits = KnownZeroTemp.countLeadingOnes();
if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp), DL))
SignBits++;
KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
}
break;
};
}
/// Compute known bits in 'V' from conditions which are known to be true along
/// all paths leading to the context instruction. In particular, look for
/// cases where one branch of an interesting condition dominates the context
/// instruction. This does not do general dataflow.
/// NOTE: This code is EXPERIMENTAL and currently off by default.
static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
APInt &KnownOne,
const DataLayout &DL,
unsigned Depth,
const Query &Q) {
// Need both the dominator tree and the query location to do anything useful
if (!Q.DT || !Q.CxtI)
return;
Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
// Avoid useless work
if (auto VI = dyn_cast<Instruction>(V))
if (VI->getParent() == Cxt->getParent())
return;
// Note: We currently implement two options. It's not clear which of these
// will survive long term, we need data for that.
// Option 1 - Try walking the dominator tree looking for conditions which
// might apply. This works well for local conditions (loop guards, etc..),
// but not as well for things far from the context instruction (presuming a
// low max blocks explored). If we can set an high enough limit, this would
// be all we need.
// Option 2 - We restrict out search to those conditions which are uses of
// the value we're interested in. This is independent of dom structure,
// but is slightly less powerful without looking through lots of use chains.
// It does handle conditions far from the context instruction (e.g. early
// function exits on entry) really well though.
// Option 1 - Search the dom tree
unsigned NumBlocksExplored = 0;
BasicBlock *Current = Cxt->getParent();
while (true) {
// Stop searching if we've gone too far up the chain
if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
break;
NumBlocksExplored++;
if (!Q.DT->getNode(Current)->getIDom())
break;
Current = Q.DT->getNode(Current)->getIDom()->getBlock();
if (!Current)
// found function entry
break;
BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
if (!BI || BI->isUnconditional())
continue;
ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
if (!Cmp)
continue;
// We're looking for conditions that are guaranteed to hold at the context
// instruction. Finding a condition where one path dominates the context
// isn't enough because both the true and false cases could merge before
// the context instruction we're actually interested in. Instead, we need
// to ensure that the taken *edge* dominates the context instruction.
BasicBlock *BB0 = BI->getSuccessor(0);
BasicBlockEdge Edge(BI->getParent(), BB0);
if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
continue;
computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
Q);
}
// Option 2 - Search the other uses of V
unsigned NumUsesExplored = 0;
for (auto U : V->users()) {
// Avoid massive lists
if (NumUsesExplored >= DomConditionsMaxUses)
break;
NumUsesExplored++;
// Consider only compare instructions uniquely controlling a branch
ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
if (!Cmp)
continue;
if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
continue;
for (auto *CmpU : Cmp->users()) {
BranchInst *BI = dyn_cast<BranchInst>(CmpU);
if (!BI || BI->isUnconditional())
continue;
// We're looking for conditions that are guaranteed to hold at the
// context instruction. Finding a condition where one path dominates
// the context isn't enough because both the true and false cases could
// merge before the context instruction we're actually interested in.
// Instead, we need to ensure that the taken *edge* dominates the context
// instruction.
BasicBlock *BB0 = BI->getSuccessor(0);
BasicBlockEdge Edge(BI->getParent(), BB0);
if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
continue;
computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, DL, Depth,
Q);
}
}
}
static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero, static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
APInt &KnownOne, const DataLayout &DL, APInt &KnownOne, const DataLayout &DL,
unsigned Depth, const Query &Q) { unsigned Depth, const Query &Q) {
@ -824,6 +1025,11 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Don't give up yet... there might be an assumption that provides more // Don't give up yet... there might be an assumption that provides more
// information... // information...
computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
// Or a dominating condition for that matter
if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
Depth, Q);
return; return;
} }
@ -846,6 +1052,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Check whether a nearby assume intrinsic can determine some known bits. // Check whether a nearby assume intrinsic can determine some known bits.
computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
// Check whether there's a dominating condition which implies something about
// this value at the given context.
if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, Depth,
Q);
Operator *I = dyn_cast<Operator>(V); Operator *I = dyn_cast<Operator>(V);
if (!I) return; if (!I) return;

View File

@ -0,0 +1,152 @@
; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
define i1 @test_cmp_ult(i64 %A) {
; CHECK-LABEL: @test_cmp_ult
entry:
%cmp = icmp ult i64 %A, 64
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: ret i1 false
%cmp2 = icmp ugt i64 %A, 64
ret i1 %cmp2
untaken:
ret i1 true
}
define i1 @test_cmp_ule(i64 %A) {
; CHECK-LABEL: @test_cmp_ule
entry:
%cmp = icmp ule i64 %A, 64
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: ret i1 false
%cmp2 = icmp ugt i64 %A, 128
ret i1 %cmp2
untaken:
ret i1 true
}
define i1 @test_cmp_sgt(i32 %A) {
; CHECK-LABEL: @test_cmp_sgt
entry:
%cmp = icmp sgt i32 %A, 10
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: ret i1 true
%cmp2 = icmp sgt i32 %A, -1
ret i1 %cmp2
untaken:
ret i1 true
}
define i64 @test_add_zero_bits(i64 %A) {
; CHECK-LABEL: @test_add_zero_bits
entry:
%cmp = icmp eq i64 %A, 2
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: ret i64 3
%add = add i64 %A, 1
ret i64 %add
untaken:
ret i64 %A
}
define i64 @test_add_nsw(i64 %A) {
; CHECK-LABEL: @test_add_nsw
entry:
%cmp = icmp ult i64 %A, 20
br i1 %cmp, label %taken, label %untaken
taken:
; CHECK-LABEL: taken:
; CHECK-NEXT: %add = add nuw nsw i64 %A, 1
; CHECK-NEXT: ret i64 %add
%add = add i64 %A, 1
ret i64 %add
untaken:
ret i64 %A
}
; After sinking the instructions into the if block, check that we
; can simplify some of them using dominating conditions.
define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp {
; CHECK-LABEL: @test_add_zero_bits_sink(
; CHECK-NOT: sdiv i32
entry:
%a = add nsw i32 %x, 16
%b = sdiv i32 %a, %x
%cmp = icmp ult i32 %x, 7
br i1 %cmp, label %bb1, label %bb2
bb1:
; CHECK-LABEL: bb1:
; CHECK-NEXT: or i32 %x, 16
; CHECK-NEXT: udiv i32
ret i32 %b
bb2:
ret i32 %x
}
; A condition in the same block gives no information
define i32 @test_neg1(i32 %x) nounwind ssp {
; CHECK-LABEL: @test_neg1
; CHECK: add
; CHECK: sdiv
; CHECK: icmp
; CHECK: select
entry:
%a = add nsw i32 %x, 16
%b = sdiv i32 %a, %x
%cmp = icmp ult i32 %x, 7
%ret = select i1 %cmp, i32 %a, i32 %b
ret i32 %ret
}
; A non-dominating edge gives no information
define i32 @test_neg2(i32 %x) {
; CHECK-LABEL: @test_neg2
entry:
%cmp = icmp ult i32 %x, 7
br i1 %cmp, label %bb1, label %merge
bb1:
br label %merge
merge:
; CHECK-LABEL: merge:
; CHECK: icmp
; CHECK: select
%cmp2 = icmp ult i32 %x, 7
%ret = select i1 %cmp2, i32 %x, i32 0
ret i32 %ret
}
; A unconditional branch expressed as a condition one gives no
; information (and shouldn't trip any asserts.)
define i32 @test_neg3(i32 %x) {
; CHECK-LABEL: @test_neg3
entry:
%cmp = icmp ult i32 %x, 7
br i1 %cmp, label %merge, label %merge
merge:
; CHECK-LABEL: merge:
; CHECK: icmp
; CHECK: select
%cmp2 = icmp ult i32 %x, 7
%ret = select i1 %cmp2, i32 %x, i32 0
ret i32 %ret
}
declare i32 @bar()