Fix a really nasty SROA bug with how we handled out-of-bounds memcpy

intrinsics. Reported on the list by Evan with a couple of attempts to fix, but it took a while to dig down to the root cause. There are two overlapping bugs here, both centering around the circumstance of discovering a memcpy operand which is known to be completely outside the bounds of the alloca. First, we need to kill the *other* side of the memcpy if it was added to this alloca. Otherwise we'll factor it into our slicing and try to rewrite it even though we know for a fact that it is dead. This is made more tricky because we can visit the sides in either order. So we have to both kill the other side and skip instructions marked as dead. The latter really should be goodness in every case, but here is a matter of correctness. Second, we need to actually remove the *uses* of the alloca by the memcpy when queuing it for later deletion. Otherwise it may still be using the alloca when we go to promote it (if the rewrite re-uses the existing alloca instruction). Do this by factoring out the use-clobbering used when for nixing a Phi argument and re-using it across the operands of a to-be-deleted instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199590 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-06 09:44:39 +00:00 · 2014-01-19 12:16:54 +00:00 · 2014-01-19 12:16:54 +00:00 · e1a5243053
commit e1a5243053
parent d0fb7e49cc
2 changed files with 64 additions and 12 deletions
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@ -461,14 +461,30 @@ private:

  void visitMemTransferInst(MemTransferInst &II) {
    ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    if ((Length && Length->getValue() == 0) ||
-        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+    if (Length && Length->getValue() == 0)
      // Zero-length mem transfer intrinsics can be ignored entirely.
      return markAsDead(II);

+    // Because we can visit these intrinsics twice, also check to see if the
+    // first time marked this instruction as dead. If so, skip it.
+    if (VisitedDeadInsts.count(&II))
+      return;
+
    if (!IsOffsetKnown)
      return PI.setAborted(&II);

+    // This side of the transfer is completely out-of-bounds, and so we can
+    // nuke the entire transfer. However, we also need to nuke the other side
+    // if already added to our partitions.
+    // FIXME: Yet another place we really should bypass this when
+    // instrumenting for ASan.
+    if (!Offset.isNegative() && Offset.uge(AllocSize)) {
+      SmallDenseMap<Instruction *, unsigned>::iterator MTPI = MemTransferSliceMap.find(&II);
+      if (MTPI != MemTransferSliceMap.end())
+        S.Slices[MTPI->second].kill();
+      return markAsDead(II);
+    }
+
    uint64_t RawOffset = Offset.getLimitedValue();
    uint64_t Size = Length ? Length->getLimitedValue()
                           : AllocSize - RawOffset;
@ -917,6 +933,7 @@ private:
                        ArrayRef<AllocaSlices::iterator> SplitUses);
  bool splitAlloca(AllocaInst &AI, AllocaSlices &S);
  bool runOnAlloca(AllocaInst &AI);
+  void clobberUse(Use &U);
  void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas);
  bool promoteAllocas(Function &F);
 };
@ -2497,8 +2514,11 @@ private:
    // alloca that should be re-examined after rewriting this instruction.
    Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
    if (AllocaInst *AI
-          = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
+          = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
+      assert(AI != &OldAI && AI != &NewAI &&
+             "Splittable transfers cannot reach the same alloca on both ends.");
      Pass.Worklist.insert(AI);
+    }

    if (EmitMemCpy) {
      Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
@ -3328,6 +3348,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) {
  return Changed;
 }

+/// \brief Clobber a use with undef, deleting the used value if it becomes dead.
+void SROA::clobberUse(Use &U) {
+  Value *OldV = U;
+  // Replace the use with an undef value.
+  U = UndefValue::get(OldV->getType());
+
+  // Check for this making an instruction dead. We have to garbage collect
+  // all the dead instructions to ensure the uses of any alloca end up being
+  // minimal.
+  if (Instruction *OldI = dyn_cast<Instruction>(OldV))
+    if (isInstructionTriviallyDead(OldI)) {
+      DeadInsts.insert(OldI);
+    }
+}
+
 /// \brief Analyze an alloca for SROA.
 ///
 /// This analyzes the alloca to ensure we can reason about it, builds
@ -3365,21 +3400,23 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
  for (AllocaSlices::dead_user_iterator DI = S.dead_user_begin(),
                                        DE = S.dead_user_end();
       DI != DE; ++DI) {
-    Changed = true;
+    // Free up everything used by this instruction.
+    for (User::op_iterator DOI = (*DI)->op_begin(), DOE = (*DI)->op_end();
+         DOI != DOE; ++DOI)
+      clobberUse(*DOI);
+
+    // Now replace the uses of this instruction.
    (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType()));
+
+    // And mark it for deletion.
    DeadInsts.insert(*DI);
+    Changed = true;
  }
  for (AllocaSlices::dead_op_iterator DO = S.dead_op_begin(),
                                      DE = S.dead_op_end();
       DO != DE; ++DO) {
-    Value *OldV = **DO;
-    // Clobber the use with an undef value.
-    **DO = UndefValue::get(OldV->getType());
-    if (Instruction *OldI = dyn_cast<Instruction>(OldV))
-      if (isInstructionTriviallyDead(OldI)) {
-        Changed = true;
-        DeadInsts.insert(OldI);
-      }
+    clobberUse(**DO);
+    Changed = true;
  }

  // No slices to split. Leave the dead alloca for a later pass to clean up.
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@ -1356,3 +1356,18 @@ entry:
  %cond105.i.i = load float* %cond105.in.i.i, align 8
  ret void
 }
+
+define void @test23(i32 %x) {
+; CHECK-LABEL: @test23(
+; CHECK-NOT: alloca
+; CHECK: ret void
+entry:
+  %a = alloca i32, align 4
+  store i32 %x, i32* %a, align 4
+  %gep1 = getelementptr inbounds i32* %a, i32 1
+  %gep0 = getelementptr inbounds i32* %a, i32 0
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cast0 = bitcast i32* %gep0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i32 1, i1 false)
+  ret void
+}