From 174f04eefbc40bffc1de9d0bf230dad0ee8119a6 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 9 Apr 2015 03:40:33 +0000 Subject: [PATCH] [AArch64] Teach AArch64TargetLowering::getOptimalMemOpType to consider alignment restrictions when choosing a type for small-memcpy inlining in SelectionDAGBuilder. This ensures that the loads and stores output for the memcpy won't be further expanded during legalization, which would cause the total number of instructions for the memcpy to exceed (often significantly) the inlining thresholds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234462 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 12 +++++++++++- .../AArch64/arm64-misaligned-memcpy-inline.ll | 14 ++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 901e5b9a2c4..4fbd5b1ee59 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6664,7 +6664,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast))) return MVT::f128; - return Size >= 8 ? MVT::i64 : MVT::i32; + if (Size >= 8 && + (memOpAlign(SrcAlign, DstAlign, 8) || + (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast))) + return MVT::i64; + + if (Size >= 4 && + (memOpAlign(SrcAlign, DstAlign, 4) || + (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast))) + return MVT::i64; + + return MVT::Other; } // 12-bit optionally shifted immediates are legal for adds. diff --git a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll new file mode 100644 index 00000000000..e71b824e097 --- /dev/null +++ b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=arm64 -aarch64-strict-align < %s | FileCheck %s + +; Small (16-bytes here) unaligned memcpys should stay memcpy calls if +; strict-alignment is turned on. +define void @t0(i8* %out, i8* %in) { +; CHECK-LABEL: t0: +; CHECK: orr w2, wzr, #0x10 +; CHECK-NEXT: bl _memcpy +entry: + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)