diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 3f50ccdaf5a..bca9fc4549a 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -229,6 +229,71 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, + const Function *CalledFunc, + InlineFunctionInfo &IFI, + unsigned ByValAlignment) { + if (CalledFunc->onlyReadsMemory()) + return Arg; + + LLVMContext &Context = Arg->getContext(); + + + const Type *AggTy = cast(Arg->getType())->getElementType(); + const Type *VoidPtrTy = Type::getInt8PtrTy(Context); + + // Create the alloca. If we have TargetData, use nice alignment. + unsigned Align = 1; + if (IFI.TD) + Align = IFI.TD->getPrefTypeAlignment(AggTy); + + // If the byval had an alignment specified, we *must* use at least that + // alignment, as it is required by the byval argument (and uses of the + // pointer inside the callee). + Align = std::max(Align, ByValAlignment); + + Function *Caller = TheCall->getParent()->getParent(); + + Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), + &*Caller->begin()->begin()); + // Emit a memcpy. + const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; + Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), + Intrinsic::memcpy, + Tys, 3); + Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); + Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); + + Value *Size; + if (IFI.TD == 0) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::getInt64Ty(Context), + IFI.TD->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DestCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::getFalse(Context) // isVolatile + }; + CallInst *TheMemCpy = + CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); + + // If we have a call graph, update it. + if (CallGraph *CG = IFI.CG) { + CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); + CallGraphNode *CallerNode = (*CG)[Caller]; + CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); + } + + // Uses of the argument in the function should use our new alloca + // instead. + return NewAlloca; +} + // InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. @@ -304,63 +369,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. - if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && - !CalledFunc->onlyReadsMemory()) { - const Type *AggTy = cast(I->getType())->getElementType(); - const Type *VoidPtrTy = Type::getInt8PtrTy(Context); - - // Create the alloca. If we have TargetData, use nice alignment. - unsigned Align = 1; - if (IFI.TD) - Align = IFI.TD->getPrefTypeAlignment(AggTy); - - // If the byval had an alignment specified, we *must* use at least that - // alignment, as it is required by the byval argument (and uses of the - // pointer inside the callee). - Align = std::max(Align, CalledFunc->getParamAlignment(ArgNo+1)); - - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), - &*Caller->begin()->begin()); - // Emit a memcpy. - const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; - Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), - Intrinsic::memcpy, - Tys, 3); - Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); - Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); - - Value *Size; - if (IFI.TD == 0) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = ConstantInt::get(Type::getInt64Ty(Context), - IFI.TD->getTypeStoreSize(AggTy)); - - // Always generate a memcpy of alignment 1 here because we don't know - // the alignment of the src pointer. Other optimizations can infer - // better alignment. - Value *CallArgs[] = { - DestCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::getFalse(Context) // isVolatile - }; - CallInst *TheMemCpy = - CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall); - - // If we have a call graph, update it. - if (CallGraph *CG = IFI.CG) { - CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); - CallGraphNode *CallerNode = (*CG)[Caller]; - CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); - } - - // Uses of the argument in the function should use our new alloca - // instead. - ActualArg = NewAlloca; - + if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) { + ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, + CalledFunc->getParamAlignment(ArgNo+1)); + // Calls that we inline may use the new alloca, so we need to clear - // their 'tail' flags. - MustClearTailCallFlags = true; + // their 'tail' flags if HandleByValArgument introduced a new alloca and + // the callee has calls. + MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; diff --git a/test/Transforms/Inline/byval.ll b/test/Transforms/Inline/byval.ll index da0beab9dc7..08219fcec8d 100644 --- a/test/Transforms/Inline/byval.ll +++ b/test/Transforms/Inline/byval.ll @@ -26,7 +26,8 @@ entry: call void @f( %struct.ss* byval %S ) nounwind ret i32 0 ; CHECK: @test1() -; CHECK: %b = alloca %struct.ss +; CHECK: %S1 = alloca %struct.ss +; CHECK: %S = alloca %struct.ss ; CHECK: call void @llvm.memcpy ; CHECK: ret i32 0 } @@ -74,9 +75,9 @@ entry: call void @f3( %struct.ss* byval align 64 %S) nounwind ret void ; CHECK: @test3() -; CHECK: %b = alloca %struct.ss, align 64 +; CHECK: %S1 = alloca %struct.ss, align 64 ; CHECK: %S = alloca %struct.ss ; CHECK: call void @llvm.memcpy -; CHECK: call void @g3(%struct.ss* %b) +; CHECK: call void @g3(%struct.ss* %S1) ; CHECK: ret void }