Add support to ValueTracking for determining that a pointer is non-null

by virtue of inbounds GEPs that preclude a null pointer.

This is a very common pattern in the code generated by std::vector and
other standard library routines which use allocators that test for null
pervasively. This is one step closer to teaching Clang+LLVM to be able
to produce an empty function for:

  void f() {
    std::vector<int> v;
    v.push_back(1);
    v.push_back(2);
    v.push_back(3);
    v.push_back(4);
  }

Which is related to getting them to completely fold SmallVector
push_back sequences into constants when inlining and other optimizations
make that a possibility.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169573 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chandler Carruth 2012-12-07 02:08:58 +00:00
parent b02ed5b8ea
commit 70d3bebc8b
2 changed files with 113 additions and 0 deletions

View File

@ -862,6 +862,72 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero,
return false;
}
/// \brief Test whether a GEP's result is known to be non-null.
///
/// Uses properties inherent in a GEP to try to determine whether it is known
/// to be non-null.
///
/// Currently this routine does not support vector GEPs.
static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
unsigned Depth) {
if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
return false;
// FIXME: Support vector-GEPs.
assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
// If the base pointer is non-null, we cannot walk to a null address with an
// inbounds GEP in address space zero.
if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth))
return true;
// Past this, if we don't have DataLayout, we can't do much.
if (!DL)
return false;
// Walk the GEP operands and see if any operand introduces a non-zero offset.
// If so, then the GEP cannot produce a null pointer, as doing so would
// inherently violate the inbounds contract within address space zero.
for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
GTI != GTE; ++GTI) {
// Struct types are easy -- they must always be indexed by a constant.
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
unsigned ElementIdx = OpC->getZExtValue();
const StructLayout *SL = DL->getStructLayout(STy);
uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
if (ElementOffset > 0)
return true;
continue;
}
// If we have a zero-sized type, the index doesn't matter. Keep looping.
if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0)
continue;
// Fast path the constant operand case both for efficiency and so we don't
// increment Depth when just zipping down an all-constant GEP.
if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
if (!OpC->isZero())
return true;
continue;
}
// We post-increment Depth here because while isKnownNonZero increments it
// as well, when we pop back up that increment won't persist. We don't want
// to recurse 10k times just because we have 10k GEP operands. We don't
// bail completely out because we want to handle constant GEPs regardless
// of depth.
if (Depth++ >= MaxDepth)
continue;
if (isKnownNonZero(GTI.getOperand(), DL, Depth))
return true;
}
return false;
}
/// isKnownNonZero - Return true if the given value is known to be non-zero
/// when defined. For vectors return true if every element is known to be
/// non-zero when defined. Supports values with integer or pointer type and
@ -881,6 +947,13 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
if (Depth++ >= MaxDepth)
return false;
// Check for pointer simplifications.
if (V->getType()->isPointerTy()) {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
if (isGEPKnownNonNull(GEP, TD, Depth))
return true;
}
unsigned BitWidth = getBitWidth(V->getType(), TD);
// X | Y != 0 if X != 0 or Y != 0.

View File

@ -165,6 +165,46 @@ entry:
ret i1 %cmp
}
define i1 @gep13(i8* %ptr) {
; CHECK: @gep13
; We can prove this GEP is non-null because it is inbounds.
%x = getelementptr inbounds i8* %ptr, i32 1
%cmp = icmp eq i8* %x, null
ret i1 %cmp
; CHECK-NEXT: ret i1 false
}
define i1 @gep14({ {}, i8 }* %ptr) {
; CHECK: @gep14
; We can't simplify this because the offset of one in the GEP actually doesn't
; move the pointer.
%x = getelementptr inbounds { {}, i8 }* %ptr, i32 0, i32 1
%cmp = icmp eq i8* %x, null
ret i1 %cmp
; CHECK-NOT: ret i1 false
}
define i1 @gep15({ {}, [4 x {i8, i8}]}* %ptr, i32 %y) {
; CHECK: @gep15
; We can prove this GEP is non-null even though there is a user value, as we
; would necessarily violate inbounds on one side or the other.
%x = getelementptr inbounds { {}, [4 x {i8, i8}]}* %ptr, i32 0, i32 1, i32 %y, i32 1
%cmp = icmp eq i8* %x, null
ret i1 %cmp
; CHECK-NEXT: ret i1 false
}
define i1 @gep16(i8* %ptr, i32 %a) {
; CHECK: @gep16
; We can prove this GEP is non-null because it is inbounds and because we know
; %b is non-zero even though we don't know its value.
%b = or i32 %a, 1
%x = getelementptr inbounds i8* %ptr, i32 %b
%cmp = icmp eq i8* %x, null
ret i1 %cmp
; CHECK-NEXT: ret i1 false
}
define i1 @zext(i32 %x) {
; CHECK: @zext
%e1 = zext i32 %x to i64