mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-30 19:35:54 +00:00
Revise alignment checking/calculation on 256-bit unaligned memory access
- It's still considered aligned when the specified alignment is larger than the natural alignment; - The new alignment for the high 128-bit vector should be min(16, alignment) as the pointer is advanced by 16, a power-of-2 offset. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177947 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b4f98ea121
commit
d4584c9e56
@ -16639,11 +16639,10 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
unsigned RegSz = RegVT.getSizeInBits();
|
unsigned RegSz = RegVT.getSizeInBits();
|
||||||
|
|
||||||
|
// On Sandybridge unaligned 256bit loads are inefficient.
|
||||||
ISD::LoadExtType Ext = Ld->getExtensionType();
|
ISD::LoadExtType Ext = Ld->getExtensionType();
|
||||||
unsigned Alignment = Ld->getAlignment();
|
unsigned Alignment = Ld->getAlignment();
|
||||||
bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8;
|
bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
|
||||||
|
|
||||||
// On Sandybridge unaligned 256bit loads are inefficient.
|
|
||||||
if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
|
if (RegVT.is256BitVector() && !Subtarget->hasInt256() &&
|
||||||
!DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
|
!DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
|
||||||
unsigned NumElems = RegVT.getVectorNumElements();
|
unsigned NumElems = RegVT.getVectorNumElements();
|
||||||
@ -16663,7 +16662,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
|
SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr,
|
||||||
Ld->getPointerInfo(), Ld->isVolatile(),
|
Ld->getPointerInfo(), Ld->isVolatile(),
|
||||||
Ld->isNonTemporal(), Ld->isInvariant(),
|
Ld->isNonTemporal(), Ld->isInvariant(),
|
||||||
std::max(Alignment/2U, 1U));
|
std::min(16U, Alignment));
|
||||||
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||||
Load1.getValue(1),
|
Load1.getValue(1),
|
||||||
Load2.getValue(1));
|
Load2.getValue(1));
|
||||||
@ -16834,13 +16833,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
DebugLoc dl = St->getDebugLoc();
|
DebugLoc dl = St->getDebugLoc();
|
||||||
SDValue StoredVal = St->getOperand(1);
|
SDValue StoredVal = St->getOperand(1);
|
||||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
unsigned Alignment = St->getAlignment();
|
|
||||||
bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8;
|
|
||||||
|
|
||||||
// If we are saving a concatenation of two XMM registers, perform two stores.
|
// If we are saving a concatenation of two XMM registers, perform two stores.
|
||||||
// On Sandy Bridge, 256-bit memory operations are executed by two
|
// On Sandy Bridge, 256-bit memory operations are executed by two
|
||||||
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
|
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
|
||||||
// memory operation.
|
// memory operation.
|
||||||
|
unsigned Alignment = St->getAlignment();
|
||||||
|
bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
|
||||||
if (VT.is256BitVector() && !Subtarget->hasInt256() &&
|
if (VT.is256BitVector() && !Subtarget->hasInt256() &&
|
||||||
StVT == VT && !IsAligned) {
|
StVT == VT && !IsAligned) {
|
||||||
unsigned NumElems = VT.getVectorNumElements();
|
unsigned NumElems = VT.getVectorNumElements();
|
||||||
@ -16860,7 +16859,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
|
SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1,
|
||||||
St->getPointerInfo(), St->isVolatile(),
|
St->getPointerInfo(), St->isVolatile(),
|
||||||
St->isNonTemporal(),
|
St->isNonTemporal(),
|
||||||
std::max(Alignment/2U, 1U));
|
std::min(16U, Alignment));
|
||||||
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
|
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ define void @storev32i8_01(<32 x i8> %a) nounwind {
|
|||||||
; CHECK: _double_save
|
; CHECK: _double_save
|
||||||
; CHECK-NOT: vinsertf128 $1
|
; CHECK-NOT: vinsertf128 $1
|
||||||
; CHECK-NOT: vinsertf128 $0
|
; CHECK-NOT: vinsertf128 $0
|
||||||
; CHECK: vmovups %xmm
|
; CHECK: vmovaps %xmm
|
||||||
; CHECK: vmovaps %xmm
|
; CHECK: vmovaps %xmm
|
||||||
define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp {
|
define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp {
|
||||||
entry:
|
entry:
|
||||||
@ -127,3 +127,25 @@ define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
|
|||||||
store <8 x i32> %x, <8 x i32>* %ret, align 1
|
store <8 x i32> %x, <8 x i32>* %ret, align 1
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: add4i64a64
|
||||||
|
; CHECK: vmovaps ({{.*}}), %ymm{{.*}}
|
||||||
|
; CHECK: vmovaps %ymm{{.*}}, ({{.*}})
|
||||||
|
define void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
|
||||||
|
%b = load <4 x i64>* %bp, align 64
|
||||||
|
%x = add <4 x i64> zeroinitializer, %b
|
||||||
|
store <4 x i64> %x, <4 x i64>* %ret, align 64
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: add4i64a16
|
||||||
|
; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
|
||||||
|
; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}}
|
||||||
|
; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
|
||||||
|
; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
|
||||||
|
define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
|
||||||
|
%b = load <4 x i64>* %bp, align 16
|
||||||
|
%x = add <4 x i64> zeroinitializer, %b
|
||||||
|
store <4 x i64> %x, <4 x i64>* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user