diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 8f6855e6a30..6fe5e188b1a 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -253,7 +253,12 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { return false; AllCallsAreTailCalls = true; + // The local stack holds all alloca instructions and all byval arguments. AllocaDerivedValueTracker Tracker; + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) + Tracker.walk(&Arg); + } for (auto &BB : F) { for (auto &I : BB) if (AllocaInst *AI = dyn_cast(&I)) @@ -309,8 +314,9 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { for (auto &Arg : CI->arg_operands()) { if (isa(Arg.getUser())) continue; - if (isa(Arg.getUser())) - continue; + if (Argument *A = dyn_cast(Arg.getUser())) + if (!A->hasByValAttr()) + continue; SafeToTail = false; break; } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 112bdcb72b1..a1e42bbe70c 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -743,8 +743,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, - unsigned ByValAlignment, - bool &AddedNewAllocas) { + unsigned ByValAlignment) { PointerType *ArgTy = cast(Arg->getType()); Type *AggTy = ArgTy->getElementType(); @@ -786,7 +785,6 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // Uses of the argument in the function should use our new alloca // instead. - AddedNewAllocas = true; return NewAlloca; } @@ -960,7 +958,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, SmallVector Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; - bool AddedNewAllocas = false; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; @@ -984,8 +981,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, - CalledFunc->getParamAlignment(ArgNo+1), - AddedNewAllocas); + CalledFunc->getParamAlignment(ArgNo+1)); if (ActualArg != *AI) ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } @@ -1100,18 +1096,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // f -> musttail g -> tail f ==> f -> tail f // f -> g -> musttail f ==> f -> f // f -> g -> tail f ==> f -> f - // - // If an alloca was introduced in the frame due to a byval parameter - // being passed to a subsequent call, tail calls must have the tail - // stripped as they may not access variables in the caller's stack. - // A single alloca ripples through out as the alloca may be aliased by - // bitcasts or may escape and be mutated outside of the function. CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); ChildTCK = std::min(CallSiteTailKind, ChildTCK); - if (AddedNewAllocas) - CI->setTailCallKind(CallInst::TCK_None); - else - CI->setTailCallKind(ChildTCK); + CI->setTailCallKind(ChildTCK); InlinedMustTailCalls |= CI->isMustTailCall(); // Calls inlined through a 'nounwind' call site should be marked diff --git a/test/Transforms/Inline/byval-tail-call.ll b/test/Transforms/Inline/byval-tail-call.ll index 95c31d2b826..154f3974b58 100644 --- a/test/Transforms/Inline/byval-tail-call.ll +++ b/test/Transforms/Inline/byval-tail-call.ll @@ -34,7 +34,7 @@ define void @frob(i32* %x) { ; CHECK: %[[VAL:.*]] = load i32* %x ; CHECK: store i32 %[[VAL]], i32* %[[POS]] ; CHECK: {{^ *}}call void @ext(i32* %[[POS]] -; CHECK: {{^ *}}call void @ext(i32* null) +; CHECK: tail call void @ext(i32* null) ; CHECK: ret void tail call void @qux(i32* byval %x) ret void diff --git a/test/Transforms/Inline/inline-tail.ll b/test/Transforms/Inline/inline-tail.ll index 565491adf5b..b40328e0a27 100644 --- a/test/Transforms/Inline/inline-tail.ll +++ b/test/Transforms/Inline/inline-tail.ll @@ -49,6 +49,42 @@ define void @test_musttail_basic_a(i32* %p) { ret void } +; Don't insert lifetime end markers here, the lifetime is trivially over due +; the return. +; CHECK: define void @test_byval_a( +; CHECK: musttail call void @test_byval_c( +; CHECK-NEXT: ret void + +declare void @test_byval_c(i32* byval %p) +define internal void @test_byval_b(i32* byval %p) { + musttail call void @test_byval_c(i32* byval %p) + ret void +} +define void @test_byval_a(i32* byval %p) { + musttail call void @test_byval_b(i32* byval %p) + ret void +} + +; Don't insert a stack restore, we're about to return. +; CHECK: define void @test_dynalloca_a( +; CHECK: call i8* @llvm.stacksave( +; CHECK: alloca i8, i32 %n +; CHECK: musttail call void @test_dynalloca_c( +; CHECK-NEXT: ret void + +declare void @escape(i8* %buf) +declare void @test_dynalloca_c(i32* byval %p, i32 %n) +define internal void @test_dynalloca_b(i32* byval %p, i32 %n) alwaysinline { + %buf = alloca i8, i32 %n ; dynamic alloca + call void @escape(i8* %buf) ; escape it + musttail call void @test_dynalloca_c(i32* byval %p, i32 %n) + ret void +} +define void @test_dynalloca_a(i32* byval %p, i32 %n) { + musttail call void @test_dynalloca_b(i32* byval %p, i32 %n) + ret void +} + ; We can't merge the returns. ; CHECK: define void @test_multiret_a( ; CHECK: musttail call void @test_multiret_c( diff --git a/test/Transforms/Inline/inlined-allocas.ll b/test/Transforms/Inline/inlined-allocas.ll deleted file mode 100644 index 254989b28f8..00000000000 --- a/test/Transforms/Inline/inlined-allocas.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt -basicaa -dse -inline -S %s | FileCheck %s - -declare void @external(i32* byval) -declare i32 @identity(i32* byval) - -; An alloca in the inlinee should not force the tail to be stripped - -define void @inlinee_with_alloca() { - %local = alloca i32 - store i32 42, i32* %local, align 4 - tail call void @external(i32* byval %local) - ret void -} - -define void @inliner_without_alloca() { - tail call void @inlinee_with_alloca() - ret void -} - -; CHECK-LABEL: inliner_without_alloca -; CHECK-NEXT: %local.i = alloca i32 -; CHECK: tail call void @external -; CHECK: ret - -; An alloca in the inliner should not force the tail to be stripped - -define i32 @inliner_with_alloca() { - %local = alloca i32 - store i32 42, i32* %local, align 4 - %1 = tail call i32 @identity(i32* byval %local) - ret i32 %1 -} - -; CHECK-LABEL: inliner_with_alloca -; CHECK: %local = alloca i32 -; CHECK: %1 = tail call i32 @identity -; CHECK: ret i32 %1 - -; Force the synthesis of the value through the byval parameter. -; The alloca should force the tail to be stripped - -define void @inlinee_with_passthru(i32* byval %value) { - tail call void @external(i32* byval %value) - ret void -} - -define void @strip_tail(i32* %value) { - tail call void @inlinee_with_passthru(i32* %value) - ret void -} - -; CHECK-LABEL: strip_tail -; CHECK: %value1 = alloca i32 -; CHECK-NOT: tail call void @external -; CHECK: ret void - diff --git a/test/Transforms/TailCallElim/basic.ll b/test/Transforms/TailCallElim/basic.ll index 3b98f8c7967..8e9814b52bb 100644 --- a/test/Transforms/TailCallElim/basic.ll +++ b/test/Transforms/TailCallElim/basic.ll @@ -147,7 +147,7 @@ cond_false: ; Don't tail call if a byval arg is captured. define void @test9(i32* byval %a) { ; CHECK-LABEL: define void @test9( -; CHECK: tail call void @use( +; CHECK: {{^ *}}call void @use( call void @use(i32* %a) ret void } diff --git a/test/Transforms/TailCallElim/byval.ll b/test/Transforms/TailCallElim/byval.ll deleted file mode 100644 index 1150f7684e9..00000000000 --- a/test/Transforms/TailCallElim/byval.ll +++ /dev/null @@ -1,34 +0,0 @@ -; RUN: opt -mtriple i386 -Os -S %s -o - | FileCheck %s -; RUN: opt -mtriple x86_64 -Os -S %s -o - | FileCheck %s -; RUN: opt -mtriple armv7 -Os -S %s -o - | FileCheck %s - -%struct.D16 = type { [16 x double] } - -declare void @_Z2OpP3D16PKS_S2_(%struct.D16*, %struct.D16*, %struct.D16*) - -define void @_Z7TestRefRK3D16S1_(%struct.D16* noalias sret %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) { - %1 = alloca %struct.D16*, align 8 - %2 = alloca %struct.D16*, align 8 - store %struct.D16* %RHS, %struct.D16** %1, align 8 - store %struct.D16* %LHS, %struct.D16** %2, align 8 - %3 = load %struct.D16** %1, align 8 - %4 = load %struct.D16** %2, align 8 - call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %3, %struct.D16* %4) - ret void -} - -; CHECK: define void @_Z7TestRefRK3D16S1_({{.*}}) { -; CHECK: tail call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) -; CHECK: ret void -; CHECK: } - -define void @_Z7TestVal3D16S_(%struct.D16* noalias sret %agg.result, %struct.D16* byval align 8 %RHS, %struct.D16* byval align 8 %LHS) { - call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) - ret void -} - -; CHECK: define void @_Z7TestVal3D16S_({{.*}}) { -; CHECK: tail call void @_Z2OpP3D16PKS_S2_(%struct.D16* %agg.result, %struct.D16* %RHS, %struct.D16* %LHS) -; CHECK: ret void -; CHECK: } -