diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 91c872d709c..35298565e58 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -87,14 +87,15 @@ void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr, // load from SrcAddr+LoopIndex // TODO: we can leverage the align parameter of llvm.memcpy for more efficient // word-sized loads and stores. - Value *Element = LoopBuilder.CreateLoad( - LoopBuilder.CreateGEP(LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), - SrcIsVolatile); + Value *Element = + LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP( + LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex), + SrcIsVolatile); // store at DstAddr+LoopIndex - LoopBuilder.CreateStore( - Element, - LoopBuilder.CreateGEP(LoopBuilder.getInt8Ty(), DstAddr, LoopIndex), - DstIsVolatile); + LoopBuilder.CreateStore(Element, + LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(), + DstAddr, LoopIndex), + DstIsVolatile); // The value for LoopIndex coming from backedge is (LoopIndex + 1) Value *NewIndex = @@ -225,7 +226,8 @@ void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr, LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB); LoopBuilder.CreateStore( - SetValue, LoopBuilder.CreateGEP(SetValue->getType(), DstAddr, LoopIndex), + SetValue, + LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex), false); Value *NewIndex = diff --git a/test/CodeGen/NVPTX/lower-aggr-copies.ll b/test/CodeGen/NVPTX/lower-aggr-copies.ll index 68d9c2322a6..ef570982b80 100644 --- a/test/CodeGen/NVPTX/lower-aggr-copies.ll +++ b/test/CodeGen/NVPTX/lower-aggr-copies.ll @@ -18,9 +18,9 @@ entry: ; IR-LABEL: @memcpy_caller ; IR: loadstoreloop: -; IR: [[LOADPTR:%[0-9]+]] = getelementptr i8, i8* %src, i64 +; IR: [[LOADPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64 ; IR-NEXT: [[VAL:%[0-9]+]] = load i8, i8* [[LOADPTR]] -; IR-NEXT: [[STOREPTR:%[0-9]+]] = getelementptr i8, i8* %dst, i64 +; IR-NEXT: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 ; IR-NEXT: store i8 [[VAL]], i8* [[STOREPTR]] ; PTX-LABEL: .visible .func (.param .b64 func_retval0) memcpy_caller @@ -61,8 +61,8 @@ entry: ; IR-LABEL: @memcpy_casting_caller ; IR: [[DSTCAST:%[0-9]+]] = bitcast i32* %dst to i8* ; IR: [[SRCCAST:%[0-9]+]] = bitcast i32* %src to i8* -; IR: getelementptr i8, i8* [[SRCCAST]] -; IR: getelementptr i8, i8* [[DSTCAST]] +; IR: getelementptr inbounds i8, i8* [[SRCCAST]] +; IR: getelementptr inbounds i8, i8* [[DSTCAST]] } define i8* @memset_caller(i8* %dst, i32 %c, i64 %n) #0 { @@ -74,7 +74,7 @@ entry: ; IR-LABEL: @memset_caller ; IR: [[VAL:%[0-9]+]] = trunc i32 %c to i8 ; IR: loadstoreloop: -; IR: [[STOREPTR:%[0-9]+]] = getelementptr i8, i8* %dst, i64 +; IR: [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64 ; IR-NEXT: store i8 [[VAL]], i8* [[STOREPTR]] ; PTX-LABEL: .visible .func (.param .b64 func_retval0) memset_caller(