mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-10-03 12:18:00 +00:00
Inliner: Handle readonly attribute per argument when adding memcpy
Patch by: Vincent Lejeune git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193356 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -337,33 +337,35 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
|
|||||||
|
|
||||||
/// HandleByValArgument - When inlining a call site that has a byval argument,
|
/// HandleByValArgument - When inlining a call site that has a byval argument,
|
||||||
/// we have to make the implicit memcpy explicit by adding it.
|
/// we have to make the implicit memcpy explicit by adding it.
|
||||||
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
|
static Value *HandleByValArgument(Value *PassedValue,
|
||||||
|
const Argument *ArgumentSignature,
|
||||||
|
Instruction *TheCall,
|
||||||
const Function *CalledFunc,
|
const Function *CalledFunc,
|
||||||
InlineFunctionInfo &IFI,
|
InlineFunctionInfo &IFI,
|
||||||
unsigned ByValAlignment) {
|
unsigned ByValAlignment) {
|
||||||
Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
|
Type *AggTy = cast<PointerType>(PassedValue->getType())->getElementType();
|
||||||
|
|
||||||
// If the called function is readonly, then it could not mutate the caller's
|
// If the called function is readonly, then it could not mutate the caller's
|
||||||
// copy of the byval'd memory. In this case, it is safe to elide the copy and
|
// copy of the byval'd memory. In this case, it is safe to elide the copy and
|
||||||
// temporary.
|
// temporary.
|
||||||
if (CalledFunc->onlyReadsMemory()) {
|
if (CalledFunc->onlyReadsMemory() || ArgumentSignature->onlyReadsMemory()) {
|
||||||
// If the byval argument has a specified alignment that is greater than the
|
// If the byval argument has a specified alignment that is greater than the
|
||||||
// passed in pointer, then we either have to round up the input pointer or
|
// passed in pointer, then we either have to round up the input pointer or
|
||||||
// give up on this transformation.
|
// give up on this transformation.
|
||||||
if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
|
if (ByValAlignment <= 1) // 0 = unspecified, 1 = no particular alignment.
|
||||||
return Arg;
|
return PassedValue;
|
||||||
|
|
||||||
// If the pointer is already known to be sufficiently aligned, or if we can
|
// If the pointer is already known to be sufficiently aligned, or if we can
|
||||||
// round it up to a larger alignment, then we don't need a temporary.
|
// round it up to a larger alignment, then we don't need a temporary.
|
||||||
if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
|
if (getOrEnforceKnownAlignment(PassedValue, ByValAlignment,
|
||||||
IFI.TD) >= ByValAlignment)
|
IFI.TD) >= ByValAlignment)
|
||||||
return Arg;
|
return PassedValue;
|
||||||
|
|
||||||
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
|
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
|
||||||
// for code quality, but rarely happens and is required for correctness.
|
// for code quality, but rarely happens and is required for correctness.
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMContext &Context = Arg->getContext();
|
LLVMContext &Context = PassedValue->getContext();
|
||||||
|
|
||||||
Type *VoidPtrTy = Type::getInt8PtrTy(Context);
|
Type *VoidPtrTy = Type::getInt8PtrTy(Context);
|
||||||
|
|
||||||
@@ -379,7 +381,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
|
|||||||
|
|
||||||
Function *Caller = TheCall->getParent()->getParent();
|
Function *Caller = TheCall->getParent()->getParent();
|
||||||
|
|
||||||
Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
|
Value *NewAlloca = new AllocaInst(AggTy, 0, Align, PassedValue->getName(),
|
||||||
&*Caller->begin()->begin());
|
&*Caller->begin()->begin());
|
||||||
// Emit a memcpy.
|
// Emit a memcpy.
|
||||||
Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
|
Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
|
||||||
@@ -387,7 +389,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
|
|||||||
Intrinsic::memcpy,
|
Intrinsic::memcpy,
|
||||||
Tys);
|
Tys);
|
||||||
Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
|
Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
|
||||||
Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
|
Value *SrcCast = new BitCastInst(PassedValue, VoidPtrTy, "tmp", TheCall);
|
||||||
|
|
||||||
Value *Size;
|
Value *Size;
|
||||||
if (IFI.TD == 0)
|
if (IFI.TD == 0)
|
||||||
@@ -588,13 +590,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
|
|||||||
for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
|
for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
|
||||||
E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
|
E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
|
||||||
Value *ActualArg = *AI;
|
Value *ActualArg = *AI;
|
||||||
|
const Argument *Arg = I;
|
||||||
|
|
||||||
// When byval arguments actually inlined, we need to make the copy implied
|
// When byval arguments actually inlined, we need to make the copy implied
|
||||||
// by them explicit. However, we don't do this if the callee is readonly
|
// by them explicit. However, we don't do this if the callee is readonly
|
||||||
// or readnone, because the copy would be unneeded: the callee doesn't
|
// or readnone, because the copy would be unneeded: the callee doesn't
|
||||||
// modify the struct.
|
// modify the struct.
|
||||||
if (CS.isByValArgument(ArgNo)) {
|
if (CS.isByValArgument(ArgNo)) {
|
||||||
ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
|
ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI,
|
||||||
CalledFunc->getParamAlignment(ArgNo+1));
|
CalledFunc->getParamAlignment(ArgNo+1));
|
||||||
|
|
||||||
// Calls that we inline may use the new alloca, so we need to clear
|
// Calls that we inline may use the new alloca, so we need to clear
|
||||||
|
@@ -25,7 +25,7 @@ entry:
|
|||||||
store i64 2, i64* %tmp4, align 4
|
store i64 2, i64* %tmp4, align 4
|
||||||
call void @f( %struct.ss* byval %S ) nounwind
|
call void @f( %struct.ss* byval %S ) nounwind
|
||||||
ret i32 0
|
ret i32 0
|
||||||
; CHECK: @test1()
|
; CHECK-LABEL: @test1()
|
||||||
; CHECK: %S1 = alloca %struct.ss
|
; CHECK: %S1 = alloca %struct.ss
|
||||||
; CHECK: %S = alloca %struct.ss
|
; CHECK: %S = alloca %struct.ss
|
||||||
; CHECK: call void @llvm.memcpy
|
; CHECK: call void @llvm.memcpy
|
||||||
@@ -52,7 +52,7 @@ entry:
|
|||||||
store i64 2, i64* %tmp4, align 4
|
store i64 2, i64* %tmp4, align 4
|
||||||
%X = call i32 @f2( %struct.ss* byval %S ) nounwind
|
%X = call i32 @f2( %struct.ss* byval %S ) nounwind
|
||||||
ret i32 %X
|
ret i32 %X
|
||||||
; CHECK: @test2()
|
; CHECK-LABEL: @test2()
|
||||||
; CHECK: %S = alloca %struct.ss
|
; CHECK: %S = alloca %struct.ss
|
||||||
; CHECK-NOT: call void @llvm.memcpy
|
; CHECK-NOT: call void @llvm.memcpy
|
||||||
; CHECK: ret i32
|
; CHECK: ret i32
|
||||||
@@ -74,7 +74,7 @@ entry:
|
|||||||
%S = alloca %struct.ss, align 1 ;; May not be aligned.
|
%S = alloca %struct.ss, align 1 ;; May not be aligned.
|
||||||
call void @f3( %struct.ss* byval align 64 %S) nounwind
|
call void @f3( %struct.ss* byval align 64 %S) nounwind
|
||||||
ret void
|
ret void
|
||||||
; CHECK: @test3()
|
; CHECK-LABEL: @test3()
|
||||||
; CHECK: %S1 = alloca %struct.ss, align 64
|
; CHECK: %S1 = alloca %struct.ss, align 64
|
||||||
; CHECK: %S = alloca %struct.ss
|
; CHECK: %S = alloca %struct.ss
|
||||||
; CHECK: call void @llvm.memcpy
|
; CHECK: call void @llvm.memcpy
|
||||||
@@ -97,10 +97,35 @@ entry:
|
|||||||
%S = alloca %struct.ss, align 2 ; <%struct.ss*> [#uses=4]
|
%S = alloca %struct.ss, align 2 ; <%struct.ss*> [#uses=4]
|
||||||
%X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind
|
%X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind
|
||||||
ret i32 %X
|
ret i32 %X
|
||||||
; CHECK: @test4()
|
; CHECK-LABEL: @test4()
|
||||||
; CHECK: %S = alloca %struct.ss, align 64
|
; CHECK: %S = alloca %struct.ss, align 64
|
||||||
; CHECK-NOT: call void @llvm.memcpy
|
; CHECK-NOT: call void @llvm.memcpy
|
||||||
; CHECK: call void @g3
|
; CHECK: call void @g3
|
||||||
; CHECK: ret i32 4
|
; CHECK: ret i32 4
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; Inlining a byval struct should NOT cause an explicit copy
|
||||||
|
; into an alloca if the parameter is readonly
|
||||||
|
|
||||||
|
define internal i32 @f5(%struct.ss* byval readonly %b) nounwind {
|
||||||
|
entry:
|
||||||
|
%tmp = getelementptr %struct.ss* %b, i32 0, i32 0 ; <i32*> [#uses=2]
|
||||||
|
%tmp1 = load i32* %tmp, align 4 ; <i32> [#uses=1]
|
||||||
|
%tmp2 = add i32 %tmp1, 1 ; <i32> [#uses=1]
|
||||||
|
ret i32 %tmp2
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @test5() nounwind {
|
||||||
|
entry:
|
||||||
|
%S = alloca %struct.ss ; <%struct.ss*> [#uses=4]
|
||||||
|
%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0 ; <i32*> [#uses=1]
|
||||||
|
store i32 1, i32* %tmp1, align 8
|
||||||
|
%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1 ; <i64*> [#uses=1]
|
||||||
|
store i64 2, i64* %tmp4, align 4
|
||||||
|
%X = call i32 @f5( %struct.ss* byval %S ) nounwind
|
||||||
|
ret i32 %X
|
||||||
|
; CHECK-LABEL: @test5()
|
||||||
|
; CHECK: %S = alloca %struct.ss
|
||||||
|
; CHECK-NOT: call void @llvm.memcpy
|
||||||
|
; CHECK: ret i32
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user