mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-08 06:32:24 +00:00
When performing a truncating store, it is sometimes possible to rearrange the
data in-register prior to saving to memory. When we reorder the data in memory we prevent the need to save multiple scalars to memory, making a single regular store. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137238 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
103b8e653c
commit
614061bfb4
@ -12574,14 +12574,91 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
|
||||
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
StoreSDNode *St = cast<StoreSDNode>(N);
|
||||
EVT VT = St->getValue().getValueType();
|
||||
EVT StVT = St->getMemoryVT();
|
||||
DebugLoc dl = St->getDebugLoc();
|
||||
|
||||
// Optimize trunc store (of multiple scalars) to shuffle and store.
|
||||
// First, pack all of the elements in one place. Next, store to memory
|
||||
// in fewer chunks.
|
||||
if (St->isTruncatingStore() && VT.isVector()) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
assert(StVT != VT && "Cannot truncate to the same type");
|
||||
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
|
||||
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
|
||||
|
||||
// From, To sizes and ElemCount must be pow of two
|
||||
if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
|
||||
// We are going to use the original vector elt for storing.
|
||||
// accumulated smaller vector elements must be a multiple of bigger size.
|
||||
if (0 != (NumElems * ToSz) % FromSz) return SDValue();
|
||||
unsigned SizeRatio = FromSz / ToSz;
|
||||
|
||||
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
|
||||
|
||||
// Create a type on which we perform the shuffle
|
||||
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
StVT.getScalarType(), NumElems*SizeRatio);
|
||||
|
||||
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
|
||||
|
||||
SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, St->getValue());
|
||||
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
|
||||
for (unsigned i = 0; i < NumElems; i++ ) ShuffleVec[i] = i * SizeRatio;
|
||||
|
||||
// Can't shuffle using an illegal type
|
||||
if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
|
||||
|
||||
SDValue Shuff = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
|
||||
DAG.getUNDEF(WideVec.getValueType()),
|
||||
ShuffleVec.data());
|
||||
// At this point all of the data is stored at the bottom of the
|
||||
// register. We now need to save it to mem.
|
||||
|
||||
// Find the largest store unit
|
||||
MVT StoreType = MVT::i8;
|
||||
for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
|
||||
tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
|
||||
MVT Tp = (MVT::SimpleValueType)tp;
|
||||
if (TLI.isTypeLegal(Tp) && StoreType.getSizeInBits() < NumElems * ToSz)
|
||||
StoreType = Tp;
|
||||
}
|
||||
|
||||
// Bitcast the original vector into a vector of store-size units
|
||||
EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
|
||||
assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
|
||||
SDValue ShuffWide = DAG.getNode(ISD::BITCAST, dl, StoreVecVT, Shuff);
|
||||
SmallVector<SDValue, 8> Chains;
|
||||
SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
|
||||
TLI.getPointerTy());
|
||||
SDValue Ptr = St->getBasePtr();
|
||||
|
||||
// Perform one or more big stores into memory.
|
||||
for (unsigned i = 0; i < (ToSz*NumElems)/StoreType.getSizeInBits() ; i++) {
|
||||
SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
|
||||
StoreType, ShuffWide,
|
||||
DAG.getIntPtrConstant(i));
|
||||
SDValue Ch = DAG.getStore(St->getChain(), dl, SubVec, Ptr,
|
||||
St->getPointerInfo(), St->isVolatile(),
|
||||
St->isNonTemporal(), St->getAlignment());
|
||||
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
|
||||
Chains.push_back(Ch);
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
|
||||
Chains.size());
|
||||
}
|
||||
|
||||
|
||||
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
|
||||
// the FP state in cases where an emms may be missing.
|
||||
// A preferable solution to the general problem is to figure out the right
|
||||
// places to insert EMMS. This qualifies as a quick hack.
|
||||
|
||||
// Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
|
||||
StoreSDNode *St = cast<StoreSDNode>(N);
|
||||
EVT VT = St->getValue().getValueType();
|
||||
if (VT.getSizeInBits() != 64)
|
||||
return SDValue();
|
||||
|
||||
|
39
test/CodeGen/X86/opt-shuff-tstore.ll
Normal file
39
test/CodeGen/X86/opt-shuff-tstore.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; RUN: llc -mcpu=corei7 < %s -o - -promote-elements -mattr=+sse2,+sse41 | FileCheck %s
|
||||
|
||||
; CHECK: func_4_8
|
||||
; A single memory write
|
||||
; CHECK: movd
|
||||
; CHECK-NEXT: ret
|
||||
define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
|
||||
%r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4>
|
||||
store <4 x i8> %r, <4 x i8>* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: func_4_16
|
||||
; CHECK: movq
|
||||
; CHECK-NEXT: ret
|
||||
define void @func_4_16(<4 x i16> %param, <4 x i16>* %p) {
|
||||
%r = add <4 x i16> %param, <i16 1, i16 2, i16 3, i16 4>
|
||||
store <4 x i16> %r, <4 x i16>* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: func_8_8
|
||||
; CHECK: movq
|
||||
; CHECK-NEXT: ret
|
||||
define void @func_8_8(<8 x i8> %param, <8 x i8>* %p) {
|
||||
%r = add <8 x i8> %param, <i8 1, i8 2, i8 3, i8 4, i8 1, i8 2, i8 3, i8 4>
|
||||
store <8 x i8> %r, <8 x i8>* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: func_2_32
|
||||
; CHECK: movq
|
||||
; CHECK-NEXT: ret
|
||||
define void @func_2_32(<2 x i32> %param, <2 x i32>* %p) {
|
||||
%r = add <2 x i32> %param, <i32 1, i32 2>
|
||||
store <2 x i32> %r, <2 x i32>* %p
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user