mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-07 11:33:44 +00:00
6eaa62af77
Summary: Currently, call slot optimization requires that if the destination is an argument, the argument has the sret attribute. This is to ensure that the memory access won't trap. In addition to sret, we can also allow the optimization to happen for arguments that have the new dereferenceable attribute, which gives the same guarantee. Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D5832 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219950 91177308-0d34-0410-b5e6-96231b3b80d8
30 lines
1.4 KiB
LLVM
30 lines
1.4 KiB
LLVM
; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
|
|
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
|
|
|
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind
|
|
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
|
|
|
|
; all bytes of %dst that are touch by the memset are dereferenceable
|
|
define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
|
|
; CHECK-LABEL: @must_remove_memcpy(
|
|
; CHECK: call void @llvm.memset.p0i8.i64
|
|
; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64
|
|
%src = alloca [4096 x i8], align 1
|
|
%p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
|
|
call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
|
|
ret void
|
|
}
|
|
|
|
; memset touch more bytes than those guaranteed to be dereferenceable
|
|
define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) {
|
|
; CHECK-LABEL: @must_not_remove_memcpy(
|
|
; CHECK: call void @llvm.memset.p0i8.i64
|
|
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
|
|
%src = alloca [4096 x i8], align 1
|
|
%p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
|
|
call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
|
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
|
|
ret void
|
|
}
|