mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-16 14:31:59 +00:00
[ARM] Align stack objects passed to memory intrinsics
Memcpy, and other memory intrinsics, typically tries to use LDM/STM if the source and target addresses are 4-byte aligned. In CodeGenPrepare look for calls to memory intrinsics and, if the object is on the stack, 4-byte align it if it's large enough that we expect that memcpy would want to use LDM/STM to copy it. Differential Revision: http://reviews.llvm.org/D7908 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232627 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bf60cd0751
commit
0328ca6cd7
@ -976,6 +976,15 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return true if the pointer arguments to CI should be aligned by aligning
|
||||||
|
/// the object whose address is being passed. If so then MinSize is set to the
|
||||||
|
/// minimum size the object must be to be aligned and PrefAlign is set to the
|
||||||
|
/// preferred alignment.
|
||||||
|
virtual bool shouldAlignPointerArgs(CallInst */*CI*/, unsigned &/*MinSize*/,
|
||||||
|
unsigned &/*PrefAlign*/) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
/// \name Helpers for TargetTransformInfo implementations
|
/// \name Helpers for TargetTransformInfo implementations
|
||||||
/// @{
|
/// @{
|
||||||
|
@ -1228,6 +1228,42 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
|
||||||
|
|
||||||
|
// Align the pointer arguments to this call if the target thinks it's a good
|
||||||
|
// idea
|
||||||
|
unsigned MinSize, PrefAlign;
|
||||||
|
if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
|
||||||
|
for (auto &Arg : CI->arg_operands()) {
|
||||||
|
// We want to align both objects whose address is used directly and
|
||||||
|
// objects whose address is used in casts and GEPs, though it only makes
|
||||||
|
// sense for GEPs if the offset is a multiple of the desired alignment and
|
||||||
|
// if size - offset meets the size threshold.
|
||||||
|
if (!Arg->getType()->isPointerTy())
|
||||||
|
continue;
|
||||||
|
APInt Offset(TD->getPointerSizeInBits(
|
||||||
|
cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
|
||||||
|
Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
|
||||||
|
uint64_t Offset2 = Offset.getLimitedValue();
|
||||||
|
AllocaInst *AI;
|
||||||
|
if ((Offset2 & (PrefAlign-1)) == 0 &&
|
||||||
|
(AI = dyn_cast<AllocaInst>(Val)) &&
|
||||||
|
AI->getAlignment() < PrefAlign &&
|
||||||
|
TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
|
||||||
|
AI->setAlignment(PrefAlign);
|
||||||
|
// TODO: Also align GlobalVariables
|
||||||
|
}
|
||||||
|
// If this is a memcpy (or similar) then we may be able to improve the
|
||||||
|
// alignment
|
||||||
|
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
|
||||||
|
unsigned Align = getKnownAlignment(MI->getDest(), *TD);
|
||||||
|
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
|
||||||
|
Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
|
||||||
|
if (Align > MI->getAlignment())
|
||||||
|
MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
|
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
|
||||||
if (II) {
|
if (II) {
|
||||||
switch (II->getIntrinsicID()) {
|
switch (II->getIntrinsicID()) {
|
||||||
|
@ -42,6 +42,7 @@
|
|||||||
#include "llvm/IR/Instruction.h"
|
#include "llvm/IR/Instruction.h"
|
||||||
#include "llvm/IR/Instructions.h"
|
#include "llvm/IR/Instructions.h"
|
||||||
#include "llvm/IR/Intrinsics.h"
|
#include "llvm/IR/Intrinsics.h"
|
||||||
|
#include "llvm/IR/IntrinsicInst.h"
|
||||||
#include "llvm/IR/Type.h"
|
#include "llvm/IR/Type.h"
|
||||||
#include "llvm/MC/MCSectionMachO.h"
|
#include "llvm/MC/MCSectionMachO.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
@ -1163,6 +1164,20 @@ const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
|
|||||||
return TargetLowering::getRegClassFor(VT);
|
return TargetLowering::getRegClassFor(VT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
|
||||||
|
// source/dest is aligned and the copy size is large enough. We therefore want
|
||||||
|
// to align such objects passed to memory intrinsics.
|
||||||
|
bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
|
||||||
|
unsigned &PrefAlign) const {
|
||||||
|
if (!isa<MemIntrinsic>(CI))
|
||||||
|
return false;
|
||||||
|
MinSize = 8;
|
||||||
|
// On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
|
||||||
|
// cycle faster than 4-byte aligned LDM.
|
||||||
|
PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Create a fast isel object.
|
// Create a fast isel object.
|
||||||
FastISel *
|
FastISel *
|
||||||
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
|
ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
|
||||||
|
@ -368,6 +368,9 @@ namespace llvm {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
|
||||||
|
unsigned &PrefAlign) const override;
|
||||||
|
|
||||||
/// createFastISel - This method returns a target specific FastISel object,
|
/// createFastISel - This method returns a target specific FastISel object,
|
||||||
/// or null if the target does not support "fast" ISel.
|
/// or null if the target does not support "fast" ISel.
|
||||||
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
|
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
|
||||||
|
@ -1,31 +1,284 @@
|
|||||||
; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
|
; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS --check-prefix=CHECK
|
||||||
; RUN: llc < %s -mtriple=thumbv7m-none-macho -o - | FileCheck %s --check-prefix=DARWIN
|
; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN --check-prefix=CHECK
|
||||||
; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
|
; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
|
||||||
; RUN: llc < %s -mtriple=arm-none-eabihf -o - | FileCheck --check-prefix=EABI %s
|
; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
|
||||||
|
|
||||||
@from = common global [500 x i32] zeroinitializer, align 4
|
@from = common global [500 x i32] zeroinitializer, align 4
|
||||||
@to = common global [500 x i32] zeroinitializer, align 4
|
@to = common global [500 x i32] zeroinitializer, align 4
|
||||||
|
|
||||||
define void @f() {
|
define void @f1() {
|
||||||
entry:
|
entry:
|
||||||
|
; CHECK-LABEL: f1
|
||||||
|
|
||||||
; CHECK: memmove
|
; CHECK-IOS: memmove
|
||||||
; EABI: __aeabi_memmove
|
; CHECK-DARWIN: memmove
|
||||||
call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
|
; CHECK-EABI: __aeabi_memmove
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
|
||||||
|
|
||||||
; CHECK: memcpy
|
; CHECK-IOS: memcpy
|
||||||
; EABI: __aeabi_memcpy
|
; CHECK-DARWIN: memcpy
|
||||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
|
; CHECK-EABI: __aeabi_memcpy
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
|
||||||
|
|
||||||
; EABI memset swaps arguments
|
; EABI memset swaps arguments
|
||||||
; CHECK: mov r1, #0
|
; CHECK-IOS: mov r1, #0
|
||||||
; CHECK: memset
|
; CHECK-IOS: memset
|
||||||
; DARWIN: movs r1, #0
|
; CHECK-DARWIN: movs r1, #0
|
||||||
; DARWIN: memset
|
; CHECK-DARWIN: memset
|
||||||
; EABI: mov r2, #0
|
; CHECK-EABI: mov r2, #0
|
||||||
; EABI: __aeabi_memset
|
; CHECK-EABI: __aeabi_memset
|
||||||
call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
|
call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
|
||||||
unreachable
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check that alloca arguments to memory intrinsics are automatically aligned if at least 8 bytes in size
|
||||||
|
define void @f2(i8* %dest, i32 %n) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: f2
|
||||||
|
|
||||||
|
; IOS (ARMv7) should 8-byte align, others should 4-byte align
|
||||||
|
; CHECK-IOS: add r1, sp, #32
|
||||||
|
; CHECK-IOS: memmove
|
||||||
|
; CHECK-DARWIN: add r1, sp, #28
|
||||||
|
; CHECK-DARWIN: memmove
|
||||||
|
; CHECK-EABI: add r1, sp, #28
|
||||||
|
; CHECK-EABI: __aeabi_memmove
|
||||||
|
%arr0 = alloca [9 x i8], align 1
|
||||||
|
%0 = bitcast [9 x i8]* %arr0 to i8*
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: add r1, sp, #16
|
||||||
|
; CHECK-IOS: memcpy
|
||||||
|
; CHECK-DARWIN: memcpy
|
||||||
|
; CHECK-EABI: __aeabi_memcpy
|
||||||
|
%arr1 = alloca [9 x i8], align 1
|
||||||
|
%1 = bitcast [9 x i8]* %arr1 to i8*
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK-IOS: mov r0, sp
|
||||||
|
; CHECK-IOS: mov r1, #0
|
||||||
|
; CHECK-IOS: memset
|
||||||
|
; CHECK-DARINW: add r0, sp, #4
|
||||||
|
; CHECK-DARWIN: movs r1, #0
|
||||||
|
; CHECK-DARWIN: memset
|
||||||
|
; CHECK-EABI: add r0, sp, #4
|
||||||
|
; CHECK-EABI: mov r2, #0
|
||||||
|
; CHECK-EABI: __aeabi_memset
|
||||||
|
%arr2 = alloca [9 x i8], align 1
|
||||||
|
%2 = bitcast [9 x i8]* %arr2 to i8*
|
||||||
|
call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check that alloca arguments are not aligned if less than 8 bytes in size
|
||||||
|
define void @f3(i8* %dest, i32 %n) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: f3
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r1, sp, #17|sub(.w)? r1, r7, #15}}
|
||||||
|
; CHECK-IOS: memmove
|
||||||
|
; CHECK-DARWIN: memmove
|
||||||
|
; CHECK-EABI: __aeabi_memmove
|
||||||
|
%arr0 = alloca [7 x i8], align 1
|
||||||
|
%0 = bitcast [7 x i8]* %arr0 to i8*
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r1, sp, #10}}
|
||||||
|
; CHECK-IOS: memcpy
|
||||||
|
; CHECK-DARWIN: memcpy
|
||||||
|
; CHECK-EABI: __aeabi_memcpy
|
||||||
|
%arr1 = alloca [7 x i8], align 1
|
||||||
|
%1 = bitcast [7 x i8]* %arr1 to i8*
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r0, sp, #3}}
|
||||||
|
; CHECK-IOS: mov r1, #0
|
||||||
|
; CHECK-IOS: memset
|
||||||
|
; CHECK-DARWIN: movs r1, #0
|
||||||
|
; CHECK-DARWIN: memset
|
||||||
|
; CHECK-EABI: mov r2, #0
|
||||||
|
; CHECK-EABI: __aeabi_memset
|
||||||
|
%arr2 = alloca [7 x i8], align 1
|
||||||
|
%2 = bitcast [7 x i8]* %arr2 to i8*
|
||||||
|
call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check that alloca arguments are not aligned if size+offset is less than 8 bytes
|
||||||
|
define void @f4(i8* %dest, i32 %n) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: f4
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #23|sub(.w)? r., r7, #17}}
|
||||||
|
; CHECK-IOS: memmove
|
||||||
|
; CHECK-DARWIN: memmove
|
||||||
|
; CHECK-EABI: __aeabi_memmove
|
||||||
|
%arr0 = alloca [9 x i8], align 1
|
||||||
|
%0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(10|14)}}
|
||||||
|
; CHECK-IOS: memcpy
|
||||||
|
; CHECK-DARWIN: memcpy
|
||||||
|
; CHECK-EABI: __aeabi_memcpy
|
||||||
|
%arr1 = alloca [9 x i8], align 1
|
||||||
|
%1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(1|5)}}
|
||||||
|
; CHECK-IOS: mov r1, #0
|
||||||
|
; CHECK-IOS: memset
|
||||||
|
; CHECK-DARWIN: movs r1, #0
|
||||||
|
; CHECK-DARWIN: memset
|
||||||
|
; CHECK-EABI: mov r2, #0
|
||||||
|
; CHECK-EABI: __aeabi_memset
|
||||||
|
%arr2 = alloca [9 x i8], align 1
|
||||||
|
%2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4
|
||||||
|
call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check that alloca arguments are not aligned if the offset is not a multiple of 4
|
||||||
|
define void @f5(i8* %dest, i32 %n) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: f5
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
|
||||||
|
; CHECK-IOS: memmove
|
||||||
|
; CHECK-DARWIN: memmove
|
||||||
|
; CHECK-EABI: __aeabi_memmove
|
||||||
|
%arr0 = alloca [13 x i8], align 1
|
||||||
|
%0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(10|14)}}
|
||||||
|
; CHECK-IOS: memcpy
|
||||||
|
; CHECK-DARWIN: memcpy
|
||||||
|
; CHECK-EABI: __aeabi_memcpy
|
||||||
|
%arr1 = alloca [13 x i8], align 1
|
||||||
|
%1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(1|5)}}
|
||||||
|
; CHECK-IOS: mov r1, #0
|
||||||
|
; CHECK-IOS: memset
|
||||||
|
; CHECK-DARWIN: movs r1, #0
|
||||||
|
; CHECK-DARWIN: memset
|
||||||
|
; CHECK-EABI: mov r2, #0
|
||||||
|
; CHECK-EABI: __aeabi_memset
|
||||||
|
%arr2 = alloca [13 x i8], align 1
|
||||||
|
%2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1
|
||||||
|
call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check that alloca arguments are not aligned if the offset is unknown
|
||||||
|
define void @f6(i8* %dest, i32 %n, i32 %i) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: f6
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #25}}
|
||||||
|
; CHECK-IOS: memmove
|
||||||
|
; CHECK-DARWIN: memmove
|
||||||
|
; CHECK-EABI: __aeabi_memmove
|
||||||
|
%arr0 = alloca [13 x i8], align 1
|
||||||
|
%0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(10|14)}}
|
||||||
|
; CHECK-IOS: memcpy
|
||||||
|
; CHECK-DARWIN: memcpy
|
||||||
|
; CHECK-EABI: __aeabi_memcpy
|
||||||
|
%arr1 = alloca [13 x i8], align 1
|
||||||
|
%1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(1|5)}}
|
||||||
|
; CHECK-IOS: mov r1, #0
|
||||||
|
; CHECK-IOS: memset
|
||||||
|
; CHECK-DARWIN: movs r1, #0
|
||||||
|
; CHECK-DARWIN: memset
|
||||||
|
; CHECK-EABI: mov r2, #0
|
||||||
|
; CHECK-EABI: __aeabi_memset
|
||||||
|
%arr2 = alloca [13 x i8], align 1
|
||||||
|
%2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i
|
||||||
|
call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check that alloca arguments are not aligned if the GEP is not inbounds
|
||||||
|
define void @f7(i8* %dest, i32 %n) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: f7
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
|
||||||
|
; CHECK-IOS: memmove
|
||||||
|
; CHECK-DARWIN: memmove
|
||||||
|
; CHECK-EABI: __aeabi_memmove
|
||||||
|
%arr0 = alloca [13 x i8], align 1
|
||||||
|
%0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(10|14)}}
|
||||||
|
; CHECK-IOS: memcpy
|
||||||
|
; CHECK-DARWIN: memcpy
|
||||||
|
; CHECK-EABI: __aeabi_memcpy
|
||||||
|
%arr1 = alloca [13 x i8], align 1
|
||||||
|
%1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(1|5)}}
|
||||||
|
; CHECK-IOS: mov r1, #0
|
||||||
|
; CHECK-IOS: memset
|
||||||
|
; CHECK-DARWIN: movs r1, #0
|
||||||
|
; CHECK-DARWIN: memset
|
||||||
|
; CHECK-EABI: mov r2, #0
|
||||||
|
; CHECK-EABI: __aeabi_memset
|
||||||
|
%arr2 = alloca [13 x i8], align 1
|
||||||
|
%2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4
|
||||||
|
call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
; Check that alloca arguments are not aligned when the offset is past the end of the allocation
|
||||||
|
define void @f8(i8* %dest, i32 %n) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: f8
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
|
||||||
|
; CHECK-IOS: memmove
|
||||||
|
; CHECK-DARWIN: memmove
|
||||||
|
; CHECK-EABI: __aeabi_memmove
|
||||||
|
%arr0 = alloca [13 x i8], align 1
|
||||||
|
%0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16
|
||||||
|
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(10|14)}}
|
||||||
|
; CHECK-IOS: memcpy
|
||||||
|
; CHECK-DARWIN: memcpy
|
||||||
|
; CHECK-EABI: __aeabi_memcpy
|
||||||
|
%arr1 = alloca [13 x i8], align 1
|
||||||
|
%1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16
|
||||||
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
; CHECK: {{add(.w)? r., sp, #(1|5)}}
|
||||||
|
; CHECK-IOS: mov r1, #0
|
||||||
|
; CHECK-IOS: memset
|
||||||
|
; CHECK-DARWIN: movs r1, #0
|
||||||
|
; CHECK-DARWIN: memset
|
||||||
|
; CHECK-EABI: mov r2, #0
|
||||||
|
; CHECK-EABI: __aeabi_memset
|
||||||
|
%arr2 = alloca [13 x i8], align 1
|
||||||
|
%2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16
|
||||||
|
call void @llvm.memset.p0i8.i32(i8* %2, i8 0, i32 %n, i32 0, i1 false)
|
||||||
|
|
||||||
|
unreachable
|
||||||
}
|
}
|
||||||
|
|
||||||
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
|
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
|
||||||
|
@ -17,7 +17,7 @@ entry:
|
|||||||
; CHECK: add.w r1, r0, #10
|
; CHECK: add.w r1, r0, #10
|
||||||
; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
|
; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
|
||||||
; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
|
; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
|
||||||
; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
|
||||||
%buf = alloca [26 x i8], align 1
|
%buf = alloca [26 x i8], align 1
|
||||||
%0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
|
%0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0
|
||||||
call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
|
call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user