Add support to ValueTracking for determining that a pointer is non-null

by virtue of inbounds GEPs that preclude a null pointer. This is a very common pattern in the code generated by std::vector and other standard library routines which use allocators that test for null pervasively. This is one step closer to teaching Clang+LLVM to be able to produce an empty function for: void f() { std::vector<int> v; v.push_back(1); v.push_back(2); v.push_back(3); v.push_back(4); } Which is related to getting them to completely fold SmallVector push_back sequences into constants when inlining and other optimizations make that a possibility. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169573 91177308-0d34-0410-b5e6-96231b3b80d8
2024-12-27 13:30:05 +00:00 · 2012-12-07 02:08:58 +00:00 · 2012-12-07 02:08:58 +00:00 · 70d3bebc8b
commit 70d3bebc8b
parent b02ed5b8ea
2 changed files with 113 additions and 0 deletions
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@ -862,6 +862,72 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero,
  return false;
 }
 /// \brief Test whether a GEP's result is known to be non-null.
 ///
 /// Uses properties inherent in a GEP to try to determine whether it is known
 /// to be non-null.
 ///
 /// Currently this routine does not support vector GEPs.
 static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
                              unsigned Depth) {
  if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
    return false;
  // FIXME: Support vector-GEPs.
  assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
  // If the base pointer is non-null, we cannot walk to a null address with an
  // inbounds GEP in address space zero.
  if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth))
    return true;
  // Past this, if we don't have DataLayout, we can't do much.
  if (!DL)
    return false;
  // Walk the GEP operands and see if any operand introduces a non-zero offset.
  // If so, then the GEP cannot produce a null pointer, as doing so would
  // inherently violate the inbounds contract within address space zero.
  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
       GTI != GTE; ++GTI) {
    // Struct types are easy -- they must always be indexed by a constant.
    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
      ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
      unsigned ElementIdx = OpC->getZExtValue();
      const StructLayout *SL = DL->getStructLayout(STy);
      uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
      if (ElementOffset > 0)
        return true;
      continue;
    }
    // If we have a zero-sized type, the index doesn't matter. Keep looping.
    if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0)
      continue;
    // Fast path the constant operand case both for efficiency and so we don't
    // increment Depth when just zipping down an all-constant GEP.
    if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
      if (!OpC->isZero())
        return true;
      continue;
    }
    // We post-increment Depth here because while isKnownNonZero increments it
    // as well, when we pop back up that increment won't persist. We don't want
    // to recurse 10k times just because we have 10k GEP operands. We don't
    // bail completely out because we want to handle constant GEPs regardless
    // of depth.
    if (Depth++ >= MaxDepth)
      continue;
    if (isKnownNonZero(GTI.getOperand(), DL, Depth))
      return true;
  }
  return false;
 }
 /// isKnownNonZero - Return true if the given value is known to be non-zero
 /// when defined.  For vectors return true if every element is known to be
 /// non-zero when defined.  Supports values with integer or pointer type and
@ -881,6 +947,13 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
  if (Depth++ >= MaxDepth)
    return false;
  // Check for pointer simplifications.
  if (V->getType()->isPointerTy()) {
    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
      if (isGEPKnownNonNull(GEP, TD, Depth))
        return true;
  }
  unsigned BitWidth = getBitWidth(V->getType(), TD);
  // X | Y != 0 if X != 0 or Y != 0.
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@ -165,6 +165,46 @@ entry:
  ret i1 %cmp
 }
 define i1 @gep13(i8* %ptr) {
 ; CHECK: @gep13
 ; We can prove this GEP is non-null because it is inbounds.
  %x = getelementptr inbounds i8* %ptr, i32 1
  %cmp = icmp eq i8* %x, null
  ret i1 %cmp
 ; CHECK-NEXT: ret i1 false
 }
 define i1 @gep14({ {}, i8 }* %ptr) {
 ; CHECK: @gep14
 ; We can't simplify this because the offset of one in the GEP actually doesn't
 ; move the pointer.
  %x = getelementptr inbounds { {}, i8 }* %ptr, i32 0, i32 1
  %cmp = icmp eq i8* %x, null
  ret i1 %cmp
 ; CHECK-NOT: ret i1 false
 }
 define i1 @gep15({ {}, [4 x {i8, i8}]}* %ptr, i32 %y) {
 ; CHECK: @gep15
 ; We can prove this GEP is non-null even though there is a user value, as we
 ; would necessarily violate inbounds on one side or the other.
  %x = getelementptr inbounds { {}, [4 x {i8, i8}]}* %ptr, i32 0, i32 1, i32 %y, i32 1
  %cmp = icmp eq i8* %x, null
  ret i1 %cmp
 ; CHECK-NEXT: ret i1 false
 }
 define i1 @gep16(i8* %ptr, i32 %a) {
 ; CHECK: @gep16
 ; We can prove this GEP is non-null because it is inbounds and because we know
 ; %b is non-zero even though we don't know its value.
  %b = or i32 %a, 1
  %x = getelementptr inbounds i8* %ptr, i32 %b
  %cmp = icmp eq i8* %x, null
  ret i1 %cmp
 ; CHECK-NEXT: ret i1 false
 }
 define i1 @zext(i32 %x) {
 ; CHECK: @zext
  %e1 = zext i32 %x to i64