diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 4a800fcbdaf..ff084013770 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -461,14 +461,30 @@ private: void visitMemTransferInst(MemTransferInst &II) { ConstantInt *Length = dyn_cast(II.getLength()); - if ((Length && Length->getValue() == 0) || - (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize))) + if (Length && Length->getValue() == 0) // Zero-length mem transfer intrinsics can be ignored entirely. return markAsDead(II); + // Because we can visit these intrinsics twice, also check to see if the + // first time marked this instruction as dead. If so, skip it. + if (VisitedDeadInsts.count(&II)) + return; + if (!IsOffsetKnown) return PI.setAborted(&II); + // This side of the transfer is completely out-of-bounds, and so we can + // nuke the entire transfer. However, we also need to nuke the other side + // if already added to our partitions. + // FIXME: Yet another place we really should bypass this when + // instrumenting for ASan. + if (!Offset.isNegative() && Offset.uge(AllocSize)) { + SmallDenseMap::iterator MTPI = MemTransferSliceMap.find(&II); + if (MTPI != MemTransferSliceMap.end()) + S.Slices[MTPI->second].kill(); + return markAsDead(II); + } + uint64_t RawOffset = Offset.getLimitedValue(); uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset; @@ -917,6 +933,7 @@ private: ArrayRef SplitUses); bool splitAlloca(AllocaInst &AI, AllocaSlices &S); bool runOnAlloca(AllocaInst &AI); + void clobberUse(Use &U); void deleteDeadInstructions(SmallPtrSet &DeletedAllocas); bool promoteAllocas(Function &F); }; @@ -2497,8 +2514,11 @@ private: // alloca that should be re-examined after rewriting this instruction. Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); if (AllocaInst *AI - = dyn_cast(OtherPtr->stripInBoundsOffsets())) + = dyn_cast(OtherPtr->stripInBoundsOffsets())) { + assert(AI != &OldAI && AI != &NewAI && + "Splittable transfers cannot reach the same alloca on both ends."); Pass.Worklist.insert(AI); + } if (EmitMemCpy) { Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() @@ -3328,6 +3348,21 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &S) { return Changed; } +/// \brief Clobber a use with undef, deleting the used value if it becomes dead. +void SROA::clobberUse(Use &U) { + Value *OldV = U; + // Replace the use with an undef value. + U = UndefValue::get(OldV->getType()); + + // Check for this making an instruction dead. We have to garbage collect + // all the dead instructions to ensure the uses of any alloca end up being + // minimal. + if (Instruction *OldI = dyn_cast(OldV)) + if (isInstructionTriviallyDead(OldI)) { + DeadInsts.insert(OldI); + } +} + /// \brief Analyze an alloca for SROA. /// /// This analyzes the alloca to ensure we can reason about it, builds @@ -3365,21 +3400,23 @@ bool SROA::runOnAlloca(AllocaInst &AI) { for (AllocaSlices::dead_user_iterator DI = S.dead_user_begin(), DE = S.dead_user_end(); DI != DE; ++DI) { - Changed = true; + // Free up everything used by this instruction. + for (User::op_iterator DOI = (*DI)->op_begin(), DOE = (*DI)->op_end(); + DOI != DOE; ++DOI) + clobberUse(*DOI); + + // Now replace the uses of this instruction. (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType())); + + // And mark it for deletion. DeadInsts.insert(*DI); + Changed = true; } for (AllocaSlices::dead_op_iterator DO = S.dead_op_begin(), DE = S.dead_op_end(); DO != DE; ++DO) { - Value *OldV = **DO; - // Clobber the use with an undef value. - **DO = UndefValue::get(OldV->getType()); - if (Instruction *OldI = dyn_cast(OldV)) - if (isInstructionTriviallyDead(OldI)) { - Changed = true; - DeadInsts.insert(OldI); - } + clobberUse(**DO); + Changed = true; } // No slices to split. Leave the dead alloca for a later pass to clean up. diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index 5d3e4b5d8b2..82b7de359e9 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -1356,3 +1356,18 @@ entry: %cond105.i.i = load float* %cond105.in.i.i, align 8 ret void } + +define void @test23(i32 %x) { +; CHECK-LABEL: @test23( +; CHECK-NOT: alloca +; CHECK: ret void +entry: + %a = alloca i32, align 4 + store i32 %x, i32* %a, align 4 + %gep1 = getelementptr inbounds i32* %a, i32 1 + %gep0 = getelementptr inbounds i32* %a, i32 0 + %cast1 = bitcast i32* %gep1 to i8* + %cast0 = bitcast i32* %gep0 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast0, i32 4, i32 1, i1 false) + ret void +}