mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
Enable unaligned memory access on PPC for scalar types
Unaligned access is supported on PPC for non-vector types, and is generally more efficient than manually expanding the loads and stores. A few of the existing test cases were using expanded unaligned loads and stores to test other features (like load/store with update), and for these test cases, unaligned access remains disabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177160 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c0d8dc0eb6
commit
2d37f7b979
@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
|
||||
static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
|
||||
cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
|
||||
|
||||
static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
|
||||
cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
|
||||
|
||||
static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
|
||||
if (TM.getSubtargetImpl()->isDarwin())
|
||||
return new TargetLoweringObjectFileMachO();
|
||||
@ -6851,6 +6854,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
}
|
||||
}
|
||||
|
||||
bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
|
||||
bool *Fast) const {
|
||||
if (DisablePPCUnaligned)
|
||||
return false;
|
||||
|
||||
// PowerPC supports unaligned memory access for simple non-vector types.
|
||||
// Although accessing unaligned addresses is not as efficient as accessing
|
||||
// aligned addresses, it is generally more efficient than manual expansion,
|
||||
// and generally only traps for software emulation when crossing page
|
||||
// boundaries.
|
||||
|
||||
if (!VT.isSimple())
|
||||
return false;
|
||||
|
||||
if (VT.getSimpleVT().isVector())
|
||||
return false;
|
||||
|
||||
if (VT == MVT::ppcf128)
|
||||
return false;
|
||||
|
||||
if (Fast)
|
||||
*Fast = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
|
||||
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
|
||||
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
|
||||
|
@ -449,6 +449,10 @@ namespace llvm {
|
||||
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
|
||||
MachineFunction &MF) const;
|
||||
|
||||
/// Is unaligned memory access allowed for the given type, and is it fast
|
||||
/// relative to software emulation.
|
||||
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
|
||||
|
||||
/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
|
||||
/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
|
||||
/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
|
||||
|
@ -1,6 +1,6 @@
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
|
||||
|
||||
define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
|
||||
entry:
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
|
||||
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
|
||||
|
||||
; FIXME: The code generation for packed structs is very poor because the
|
||||
; PowerPC target wrongly rejects all unaligned loads. This test case will
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s
|
||||
; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim -disable-ppc-unaligned < %s | FileCheck %s
|
||||
|
||||
; FIXME: The code generation for packed structs is very poor because the
|
||||
; PowerPC target wrongly rejects all unaligned loads. This test case will
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
|
73
test/CodeGen/PowerPC/unaligned.ll
Normal file
73
test/CodeGen/PowerPC/unaligned.ll
Normal file
@ -0,0 +1,73 @@
|
||||
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
|
||||
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
|
||||
|
||||
define void @foo1(i16* %p, i16* %r) nounwind {
|
||||
entry:
|
||||
%v = load i16* %p, align 1
|
||||
store i16 %v, i16* %r, align 1
|
||||
ret void
|
||||
|
||||
; CHECK: @foo1
|
||||
; CHECK: lhz
|
||||
; CHECK: sth
|
||||
}
|
||||
|
||||
define void @foo2(i32* %p, i32* %r) nounwind {
|
||||
entry:
|
||||
%v = load i32* %p, align 1
|
||||
store i32 %v, i32* %r, align 1
|
||||
ret void
|
||||
|
||||
; CHECK: @foo2
|
||||
; CHECK: lwz
|
||||
; CHECK: stw
|
||||
}
|
||||
|
||||
define void @foo3(i64* %p, i64* %r) nounwind {
|
||||
entry:
|
||||
%v = load i64* %p, align 1
|
||||
store i64 %v, i64* %r, align 1
|
||||
ret void
|
||||
|
||||
; CHECK: @foo3
|
||||
; CHECK: ld
|
||||
; CHECK: std
|
||||
}
|
||||
|
||||
define void @foo4(float* %p, float* %r) nounwind {
|
||||
entry:
|
||||
%v = load float* %p, align 1
|
||||
store float %v, float* %r, align 1
|
||||
ret void
|
||||
|
||||
; CHECK: @foo4
|
||||
; CHECK: lfs
|
||||
; CHECK: stfs
|
||||
}
|
||||
|
||||
define void @foo5(double* %p, double* %r) nounwind {
|
||||
entry:
|
||||
%v = load double* %p, align 1
|
||||
store double %v, double* %r, align 1
|
||||
ret void
|
||||
|
||||
; CHECK: @foo5
|
||||
; CHECK: lfd
|
||||
; CHECK: stfd
|
||||
}
|
||||
|
||||
define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
|
||||
entry:
|
||||
%v = load <4 x float>* %p, align 1
|
||||
store <4 x float> %v, <4 x float>* %r, align 1
|
||||
ret void
|
||||
|
||||
; These loads and stores are legalized into aligned loads and stores
|
||||
; using aligned stack slots.
|
||||
; CHECK: @foo6
|
||||
; CHECK: ld
|
||||
; CHECK: ld
|
||||
; CHECK: std
|
||||
; CHECK: std
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user