mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2026-04-20 00:20:11 +00:00
Actually support volatile memcpys in NVPTX lowering
Differential Revision: http://reviews.llvm.org/D11091 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@241914 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -57,7 +57,6 @@ char NVPTXLowerAggrCopies::ID = 0;
|
||||
// Lower MemTransferInst or load-store pair to loop
|
||||
static void convertTransferToLoop(
|
||||
Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
|
||||
//unsigned numLoads,
|
||||
bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
|
||||
Type *indType = len->getType();
|
||||
|
||||
@@ -84,6 +83,8 @@ static void convertTransferToLoop(
|
||||
ind->addIncoming(ConstantInt::get(indType, 0), origBB);
|
||||
|
||||
// load from srcAddr+ind
|
||||
// TODO: we can leverage the align parameter of llvm.memcpy for more efficient
|
||||
// word-sized loads and stores.
|
||||
Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
|
||||
srcVolatile);
|
||||
// store at dstAddr+ind
|
||||
@@ -200,13 +201,14 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
|
||||
}
|
||||
|
||||
for (MemTransferInst *cpy : aggrMemcpys) {
|
||||
Value *len = cpy->getLength();
|
||||
// llvm 2.7 version of memcpy does not have volatile
|
||||
// operand yet. So always making it non-volatile
|
||||
// optimistically, so that we don't see unnecessary
|
||||
// st.volatile in ptx
|
||||
convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
|
||||
false, Context, F);
|
||||
convertTransferToLoop(/* splitAt */ cpy,
|
||||
/* srcAddr */ cpy->getSource(),
|
||||
/* dstAddr */ cpy->getDest(),
|
||||
/* len */ cpy->getLength(),
|
||||
/* srcVolatile */ cpy->isVolatile(),
|
||||
/* dstVolatile */ cpy->isVolatile(),
|
||||
/* Context */ Context,
|
||||
/* Function F */ F);
|
||||
cpy->eraseFromParent();
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,19 @@ entry:
|
||||
; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
|
||||
}
|
||||
|
||||
define i8* @memcpy_volatile_caller(i8* %dst, i8* %src, i64 %n) #0 {
|
||||
entry:
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i32 1, i1 true)
|
||||
ret i8* %dst
|
||||
; CHECK-LABEL: .visible .func (.param .b32 func_retval0) memcpy_volatile_caller
|
||||
; CHECK: LBB[[LABEL:[_0-9]+]]:
|
||||
; CHECK: ld.volatile.u8 %rs[[REG:[0-9]+]]
|
||||
; CHECK: st.volatile.u8 [%r{{[0-9]+}}], %rs[[REG]]
|
||||
; CHECK: add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
|
||||
; CHECK-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
|
||||
; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
|
||||
}
|
||||
|
||||
define i8* @memset_caller(i8* %dst, i32 %c, i64 %n) #0 {
|
||||
entry:
|
||||
%0 = trunc i32 %c to i8
|
||||
|
||||
Reference in New Issue
Block a user