Enable unaligned memory access on PPC for scalar types

Unaligned access is supported on PPC for non-vector types, and is generally
more efficient than manually expanding the loads and stores.

A few of the existing test cases were using expanded unaligned loads and stores
to test other features (like load/store with update), and for these test cases,
unaligned access remains disabled.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177160 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2013-03-15 15:27:13 +00:00
parent c0d8dc0eb6
commit 2d37f7b979
7 changed files with 110 additions and 4 deletions

View File

@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
@ -6851,6 +6854,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
bool *Fast) const {
if (DisablePPCUnaligned)
return false;
// PowerPC supports unaligned memory access for simple non-vector types.
// Although accessing unaligned addresses is not as efficient as accessing
// aligned addresses, it is generally more efficient than manual expansion,
// and generally only traps for software emulation when crossing page
// boundaries.
if (!VT.isSimple())
return false;
if (VT.getSimpleVT().isVector())
return false;
if (VT == MVT::ppcf128)
return false;
if (Fast)
*Fast = true;
return true;
}
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd

View File

@ -449,6 +449,10 @@ namespace llvm {
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd

View File

@ -1,6 +1,6 @@
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
; RUN: llc < %s | FileCheck %s
; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
entry:

View File

@ -1,4 +1,4 @@
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
; FIXME: The code generation for packed structs is very poor because the
; PowerPC target wrongly rejects all unaligned loads. This test case will

View File

@ -1,4 +1,4 @@
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
; FIXME: The code generation for packed structs is very poor because the
; PowerPC target wrongly rejects all unaligned loads. This test case will

View File

@ -1,4 +1,4 @@
; RUN: llc < %s | FileCheck %s
; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"

View File

@ -0,0 +1,73 @@
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
define void @foo1(i16* %p, i16* %r) nounwind {
entry:
%v = load i16* %p, align 1
store i16 %v, i16* %r, align 1
ret void
; CHECK: @foo1
; CHECK: lhz
; CHECK: sth
}
define void @foo2(i32* %p, i32* %r) nounwind {
entry:
%v = load i32* %p, align 1
store i32 %v, i32* %r, align 1
ret void
; CHECK: @foo2
; CHECK: lwz
; CHECK: stw
}
define void @foo3(i64* %p, i64* %r) nounwind {
entry:
%v = load i64* %p, align 1
store i64 %v, i64* %r, align 1
ret void
; CHECK: @foo3
; CHECK: ld
; CHECK: std
}
define void @foo4(float* %p, float* %r) nounwind {
entry:
%v = load float* %p, align 1
store float %v, float* %r, align 1
ret void
; CHECK: @foo4
; CHECK: lfs
; CHECK: stfs
}
define void @foo5(double* %p, double* %r) nounwind {
entry:
%v = load double* %p, align 1
store double %v, double* %r, align 1
ret void
; CHECK: @foo5
; CHECK: lfd
; CHECK: stfd
}
define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
entry:
%v = load <4 x float>* %p, align 1
store <4 x float> %v, <4 x float>* %r, align 1
ret void
; These loads and stores are legalized into aligned loads and stores
; using aligned stack slots.
; CHECK: @foo6
; CHECK: ld
; CHECK: ld
; CHECK: std
; CHECK: std
}