mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-26 23:24:34 +00:00
Improved widening loads by adding support for wider loads if
the alignment allows. Fixed a bug where we didn't use a vector load/store for PR5626. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94338 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -1533,10 +1533,10 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
|
|||||||
Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
|
Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
|
||||||
|
|
||||||
// If EltVT smaller than OpVT, only store the bits necessary.
|
// If EltVT smaller than OpVT, only store the bits necessary.
|
||||||
if (EltVT.bitsLT(OpVT))
|
if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {
|
||||||
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
|
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
|
||||||
Node->getOperand(i), Idx, SV, Offset, EltVT));
|
Node->getOperand(i), Idx, SV, Offset, EltVT));
|
||||||
else
|
} else
|
||||||
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
|
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
|
||||||
Node->getOperand(i), Idx, SV, Offset));
|
Node->getOperand(i), Idx, SV, Offset));
|
||||||
}
|
}
|
||||||
|
@ -633,43 +633,33 @@ private:
|
|||||||
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
|
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
|
||||||
//===--------------------------------------------------------------------===//
|
//===--------------------------------------------------------------------===//
|
||||||
|
|
||||||
/// Helper genWidenVectorLoads - Helper function to generate a set of
|
/// Helper GenWidenVectorLoads - Helper function to generate a set of
|
||||||
/// loads to load a vector with a resulting wider type. It takes
|
/// loads to load a vector with a resulting wider type. It takes
|
||||||
/// ExtType: Extension type
|
/// LdChain: list of chains for the load to be generated.
|
||||||
/// LdChain: list of chains for the load we have generated.
|
/// Ld: load to widen
|
||||||
/// Chain: incoming chain for the ld vector.
|
SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
|
||||||
/// BasePtr: base pointer to load from.
|
LoadSDNode *LD);
|
||||||
/// SV: memory disambiguation source value.
|
|
||||||
/// SVOffset: memory disambiugation offset.
|
/// GenWidenVectorExtLoads - Helper function to generate a set of extension
|
||||||
/// Alignment: alignment of the memory.
|
/// loads to load a ector with a resulting wider type. It takes
|
||||||
/// isVolatile: volatile load.
|
/// LdChain: list of chains for the load to be generated.
|
||||||
/// LdWidth: width of memory that we want to load.
|
/// Ld: load to widen
|
||||||
/// ResType: the wider result result type for the resulting vector.
|
/// ExtType: extension element type
|
||||||
/// dl: DebugLoc to be applied to new nodes
|
SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
|
||||||
SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, SDValue Chain,
|
LoadSDNode *LD, ISD::LoadExtType ExtType);
|
||||||
SDValue BasePtr, const Value *SV,
|
|
||||||
int SVOffset, unsigned Alignment,
|
|
||||||
bool isVolatile, unsigned LdWidth,
|
|
||||||
EVT ResType, DebugLoc dl);
|
|
||||||
|
|
||||||
/// Helper genWidenVectorStores - Helper function to generate a set of
|
/// Helper genWidenVectorStores - Helper function to generate a set of
|
||||||
/// stores to store a widen vector into non widen memory
|
/// stores to store a widen vector into non widen memory
|
||||||
/// It takes
|
|
||||||
/// StChain: list of chains for the stores we have generated
|
/// StChain: list of chains for the stores we have generated
|
||||||
/// Chain: incoming chain for the ld vector
|
/// ST: store of a widen value
|
||||||
/// BasePtr: base pointer to load from
|
void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
|
||||||
/// SV: memory disambiguation source value
|
|
||||||
/// SVOffset: memory disambiugation offset
|
/// Helper genWidenVectorTruncStores - Helper function to generate a set of
|
||||||
/// Alignment: alignment of the memory
|
/// stores to store a truncate widen vector into non widen memory
|
||||||
/// isVolatile: volatile lod
|
/// StChain: list of chains for the stores we have generated
|
||||||
/// ValOp: value to store
|
/// ST: store of a widen value
|
||||||
/// StWidth: width of memory that we want to store
|
void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
|
||||||
/// dl: DebugLoc to be applied to new nodes
|
StoreSDNode *ST);
|
||||||
void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, SDValue Chain,
|
|
||||||
SDValue BasePtr, const Value *SV,
|
|
||||||
int SVOffset, unsigned Alignment,
|
|
||||||
bool isVolatile, SDValue ValOp,
|
|
||||||
unsigned StWidth, DebugLoc dl);
|
|
||||||
|
|
||||||
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
|
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
|
||||||
/// input vector must have the same element type as NVT.
|
/// input vector must have the same element type as NVT.
|
||||||
|
@ -1655,68 +1655,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
|
|||||||
|
|
||||||
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
|
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
|
||||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||||
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
|
|
||||||
EVT LdVT = LD->getMemoryVT();
|
|
||||||
DebugLoc dl = N->getDebugLoc();
|
|
||||||
assert(LdVT.isVector() && WidenVT.isVector());
|
|
||||||
|
|
||||||
// Load information
|
|
||||||
SDValue Chain = LD->getChain();
|
|
||||||
SDValue BasePtr = LD->getBasePtr();
|
|
||||||
int SVOffset = LD->getSrcValueOffset();
|
|
||||||
unsigned Align = LD->getAlignment();
|
|
||||||
bool isVolatile = LD->isVolatile();
|
|
||||||
const Value *SV = LD->getSrcValue();
|
|
||||||
ISD::LoadExtType ExtType = LD->getExtensionType();
|
ISD::LoadExtType ExtType = LD->getExtensionType();
|
||||||
|
|
||||||
SDValue Result;
|
SDValue Result;
|
||||||
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
|
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
|
||||||
if (ExtType != ISD::NON_EXTLOAD) {
|
if (ExtType != ISD::NON_EXTLOAD)
|
||||||
// For extension loads, we can not play the tricks of chopping legal
|
Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
|
||||||
// vector types and bit cast it to the right type. Instead, we unroll
|
else
|
||||||
// the load and build a vector.
|
Result = GenWidenVectorLoads(LdChain, LD);
|
||||||
EVT EltVT = WidenVT.getVectorElementType();
|
|
||||||
EVT LdEltVT = LdVT.getVectorElementType();
|
|
||||||
unsigned NumElts = LdVT.getVectorNumElements();
|
|
||||||
|
|
||||||
// Load each element and widen
|
// If we generate a single load, we can use that for the chain. Otherwise,
|
||||||
unsigned WidenNumElts = WidenVT.getVectorNumElements();
|
// build a factor node to remember the multiple loads are independent and
|
||||||
SmallVector<SDValue, 16> Ops(WidenNumElts);
|
// chain to that.
|
||||||
unsigned Increment = LdEltVT.getSizeInBits() / 8;
|
SDValue NewChain;
|
||||||
Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
|
if (LdChain.size() == 1)
|
||||||
LdEltVT, isVolatile, Align);
|
NewChain = LdChain[0];
|
||||||
LdChain.push_back(Ops[0].getValue(1));
|
else
|
||||||
unsigned i = 0, Offset = Increment;
|
NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
|
||||||
for (i=1; i < NumElts; ++i, Offset += Increment) {
|
&LdChain[0], LdChain.size());
|
||||||
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
|
|
||||||
BasePtr, DAG.getIntPtrConstant(Offset));
|
|
||||||
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
|
|
||||||
SVOffset + Offset, LdEltVT, isVolatile, Align);
|
|
||||||
LdChain.push_back(Ops[i].getValue(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fill the rest with undefs
|
|
||||||
SDValue UndefVal = DAG.getUNDEF(EltVT);
|
|
||||||
for (; i != WidenNumElts; ++i)
|
|
||||||
Ops[i] = UndefVal;
|
|
||||||
|
|
||||||
Result = DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
|
|
||||||
} else {
|
|
||||||
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
|
|
||||||
unsigned int LdWidth = LdVT.getSizeInBits();
|
|
||||||
Result = GenWidenVectorLoads(LdChain, Chain, BasePtr, SV, SVOffset,
|
|
||||||
Align, isVolatile, LdWidth, WidenVT, dl);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we generate a single load, we can use that for the chain. Otherwise,
|
|
||||||
// build a factor node to remember the multiple loads are independent and
|
|
||||||
// chain to that.
|
|
||||||
SDValue NewChain;
|
|
||||||
if (LdChain.size() == 1)
|
|
||||||
NewChain = LdChain[0];
|
|
||||||
else
|
|
||||||
NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LdChain[0],
|
|
||||||
LdChain.size());
|
|
||||||
|
|
||||||
// Modified the chain - switch anything that used the old chain to use
|
// Modified the chain - switch anything that used the old chain to use
|
||||||
// the new one.
|
// the new one.
|
||||||
@ -1954,57 +1910,17 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
|
|||||||
// We have to widen the value but we want only to store the original
|
// We have to widen the value but we want only to store the original
|
||||||
// vector type.
|
// vector type.
|
||||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||||
SDValue Chain = ST->getChain();
|
|
||||||
SDValue BasePtr = ST->getBasePtr();
|
|
||||||
const Value *SV = ST->getSrcValue();
|
|
||||||
int SVOffset = ST->getSrcValueOffset();
|
|
||||||
unsigned Align = ST->getAlignment();
|
|
||||||
bool isVolatile = ST->isVolatile();
|
|
||||||
SDValue ValOp = GetWidenedVector(ST->getValue());
|
|
||||||
DebugLoc dl = N->getDebugLoc();
|
|
||||||
|
|
||||||
EVT StVT = ST->getMemoryVT();
|
|
||||||
EVT ValVT = ValOp.getValueType();
|
|
||||||
// It must be true that we the widen vector type is bigger than where
|
|
||||||
// we need to store.
|
|
||||||
assert(StVT.isVector() && ValOp.getValueType().isVector());
|
|
||||||
assert(StVT.bitsLT(ValOp.getValueType()));
|
|
||||||
|
|
||||||
SmallVector<SDValue, 16> StChain;
|
SmallVector<SDValue, 16> StChain;
|
||||||
if (ST->isTruncatingStore()) {
|
if (ST->isTruncatingStore())
|
||||||
// For truncating stores, we can not play the tricks of chopping legal
|
GenWidenVectorTruncStores(StChain, ST);
|
||||||
// vector types and bit cast it to the right type. Instead, we unroll
|
else
|
||||||
// the store.
|
GenWidenVectorStores(StChain, ST);
|
||||||
EVT StEltVT = StVT.getVectorElementType();
|
|
||||||
EVT ValEltVT = ValVT.getVectorElementType();
|
|
||||||
unsigned Increment = ValEltVT.getSizeInBits() / 8;
|
|
||||||
unsigned NumElts = StVT.getVectorNumElements();
|
|
||||||
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
|
|
||||||
DAG.getIntPtrConstant(0));
|
|
||||||
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
|
|
||||||
SVOffset, StEltVT,
|
|
||||||
isVolatile, Align));
|
|
||||||
unsigned Offset = Increment;
|
|
||||||
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
|
|
||||||
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
|
|
||||||
BasePtr, DAG.getIntPtrConstant(Offset));
|
|
||||||
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
|
|
||||||
DAG.getIntPtrConstant(0));
|
|
||||||
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
|
|
||||||
SVOffset + Offset, StEltVT,
|
|
||||||
isVolatile, MinAlign(Align, Offset)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
assert(StVT.getVectorElementType() == ValVT.getVectorElementType());
|
|
||||||
// Store value
|
|
||||||
GenWidenVectorStores(StChain, Chain, BasePtr, SV, SVOffset,
|
|
||||||
Align, isVolatile, ValOp, StVT.getSizeInBits(), dl);
|
|
||||||
}
|
|
||||||
if (StChain.size() == 1)
|
if (StChain.size() == 1)
|
||||||
return StChain[0];
|
return StChain[0];
|
||||||
else
|
else
|
||||||
return DAG.getNode(ISD::TokenFactor, dl,
|
return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
|
||||||
MVT::Other,&StChain[0],StChain.size());
|
MVT::Other,&StChain[0],StChain.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2012,179 +1928,383 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
|
|||||||
// Vector Widening Utilities
|
// Vector Widening Utilities
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// Utility function to find the type to chop up a widen vector for load/store
|
||||||
|
// TLI: Target lowering used to determine legal types.
|
||||||
|
// Width: Width left need to load/store.
|
||||||
|
// WidenVT: The widen vector type to load to/store from
|
||||||
|
// Align: If 0, don't allow use of a wider type
|
||||||
|
// WidenEx: If Align is not 0, the amount additional we can load/store from.
|
||||||
|
|
||||||
// Utility function to find a vector type and its associated element
|
static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
|
||||||
// type from a preferred width and whose vector type must be the same size
|
unsigned Width, EVT WidenVT,
|
||||||
// as the VecVT.
|
unsigned Align = 0, unsigned WidenEx = 0) {
|
||||||
// TLI: Target lowering used to determine legal types.
|
EVT WidenEltVT = WidenVT.getVectorElementType();
|
||||||
// Width: Preferred width to store.
|
unsigned WidenWidth = WidenVT.getSizeInBits();
|
||||||
// VecVT: Vector value type whose size we must match.
|
unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
|
||||||
// Returns NewVecVT and NewEltVT - the vector type and its associated
|
unsigned AlignInBits = Align*8;
|
||||||
// element type.
|
|
||||||
static void FindAssocWidenVecType(SelectionDAG& DAG,
|
// If we have one element to load/store, return it.
|
||||||
const TargetLowering &TLI, unsigned Width,
|
EVT RetVT = WidenEltVT;
|
||||||
EVT VecVT,
|
if (Width == WidenEltWidth)
|
||||||
EVT& NewEltVT, EVT& NewVecVT) {
|
return RetVT;
|
||||||
unsigned EltWidth = Width + 1;
|
|
||||||
if (TLI.isTypeLegal(VecVT)) {
|
// See if there is larger legal integer than the element type to load/store
|
||||||
// We start with the preferred with, making it a power of 2 and find a
|
unsigned VT;
|
||||||
// legal vector type of that width. If not, we reduce it by another of 2.
|
for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
|
||||||
// For incoming type is legal, this process will end as a vector of the
|
VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
|
||||||
// smallest loadable type should always be legal.
|
EVT MemVT((MVT::SimpleValueType) VT);
|
||||||
do {
|
unsigned MemVTWidth = MemVT.getSizeInBits();
|
||||||
assert(EltWidth > 0);
|
if (MemVT.getSizeInBits() <= WidenEltWidth)
|
||||||
EltWidth = 1 << Log2_32(EltWidth - 1);
|
break;
|
||||||
NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
|
if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
|
||||||
unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
|
(MemVTWidth <= Width ||
|
||||||
NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
|
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
|
||||||
} while (!TLI.isTypeLegal(NewVecVT) ||
|
RetVT = MemVT;
|
||||||
VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
|
break;
|
||||||
} else {
|
}
|
||||||
// The incoming vector type is illegal and is the result of widening
|
|
||||||
// a vector to a power of 2. In this case, we will use the preferred
|
|
||||||
// with as long as it is a multiple of the incoming vector length.
|
|
||||||
// The legalization process will eventually make this into a legal type
|
|
||||||
// and remove the illegal bit converts (which would turn to stack converts
|
|
||||||
// if they are allow to exist).
|
|
||||||
do {
|
|
||||||
assert(EltWidth > 0);
|
|
||||||
EltWidth = 1 << Log2_32(EltWidth - 1);
|
|
||||||
NewEltVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
|
|
||||||
unsigned NumElts = VecVT.getSizeInBits() / EltWidth;
|
|
||||||
NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, NumElts);
|
|
||||||
} while (!TLI.isTypeLegal(NewEltVT) ||
|
|
||||||
VecVT.getSizeInBits() != NewVecVT.getSizeInBits());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See if there is a larger vector type to load/store that has the same vector
|
||||||
|
// element type and is evenly divisible with the WidenVT.
|
||||||
|
for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
|
||||||
|
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
|
||||||
|
EVT MemVT = (MVT::SimpleValueType) VT;
|
||||||
|
unsigned MemVTWidth = MemVT.getSizeInBits();
|
||||||
|
if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
|
||||||
|
(WidenWidth % MemVTWidth) == 0 &&
|
||||||
|
(MemVTWidth <= Width ||
|
||||||
|
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
|
||||||
|
if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
|
||||||
|
return MemVT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return RetVT;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Builds a vector type from scalar loads
|
||||||
|
// VecTy: Resulting Vector type
|
||||||
|
// LDOps: Load operators to build a vector type
|
||||||
|
// [Start,End) the list of loads to use.
|
||||||
|
static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
|
||||||
|
SmallVector<SDValue, 16>& LdOps,
|
||||||
|
unsigned Start, unsigned End) {
|
||||||
|
DebugLoc dl = LdOps[Start].getDebugLoc();
|
||||||
|
EVT LdTy = LdOps[Start].getValueType();
|
||||||
|
unsigned Width = VecTy.getSizeInBits();
|
||||||
|
unsigned NumElts = Width / LdTy.getSizeInBits();
|
||||||
|
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
|
||||||
|
|
||||||
|
unsigned Idx = 1;
|
||||||
|
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
|
||||||
|
|
||||||
|
for (unsigned i = Start + 1; i != End; ++i) {
|
||||||
|
EVT NewLdTy = LdOps[i].getValueType();
|
||||||
|
if (NewLdTy != LdTy) {
|
||||||
|
NumElts = Width / NewLdTy.getSizeInBits();
|
||||||
|
NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
|
||||||
|
VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
|
||||||
|
// Readjust position and vector position based on new load type
|
||||||
|
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
|
||||||
|
LdTy = NewLdTy;
|
||||||
|
}
|
||||||
|
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
|
||||||
|
DAG.getIntPtrConstant(Idx++));
|
||||||
|
}
|
||||||
|
return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
|
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
|
||||||
SDValue Chain,
|
LoadSDNode * LD) {
|
||||||
SDValue BasePtr,
|
|
||||||
const Value *SV,
|
|
||||||
int SVOffset,
|
|
||||||
unsigned Alignment,
|
|
||||||
bool isVolatile,
|
|
||||||
unsigned LdWidth,
|
|
||||||
EVT ResType,
|
|
||||||
DebugLoc dl) {
|
|
||||||
// The strategy assumes that we can efficiently load powers of two widths.
|
// The strategy assumes that we can efficiently load powers of two widths.
|
||||||
// The routines chops the vector into the largest power of 2 load and
|
// The routines chops the vector into the largest vector loads with the same
|
||||||
// can be inserted into a legal vector and then cast the result into the
|
// element type or scalar loads and then recombines it to the widen vector
|
||||||
// vector type we want. This avoids unnecessary stack converts.
|
// type.
|
||||||
|
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
|
||||||
|
unsigned WidenWidth = WidenVT.getSizeInBits();
|
||||||
|
EVT LdVT = LD->getMemoryVT();
|
||||||
|
DebugLoc dl = LD->getDebugLoc();
|
||||||
|
assert(LdVT.isVector() && WidenVT.isVector());
|
||||||
|
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
|
||||||
|
|
||||||
// TODO: If the Ldwidth is legal, alignment is the same as the LdWidth, and
|
// Load information
|
||||||
// the load is nonvolatile, we an use a wider load for the value.
|
SDValue Chain = LD->getChain();
|
||||||
|
SDValue BasePtr = LD->getBasePtr();
|
||||||
|
int SVOffset = LD->getSrcValueOffset();
|
||||||
|
unsigned Align = LD->getAlignment();
|
||||||
|
bool isVolatile = LD->isVolatile();
|
||||||
|
const Value *SV = LD->getSrcValue();
|
||||||
|
|
||||||
|
int LdWidth = LdVT.getSizeInBits();
|
||||||
|
int WidthDiff = WidenWidth - LdWidth; // Difference
|
||||||
|
unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
|
||||||
|
|
||||||
// Find the vector type that can load from.
|
// Find the vector type that can load from.
|
||||||
EVT NewEltVT, NewVecVT;
|
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
|
||||||
unsigned NewEltVTWidth;
|
int NewVTWidth = NewVT.getSizeInBits();
|
||||||
FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
|
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
|
||||||
NewEltVTWidth = NewEltVT.getSizeInBits();
|
isVolatile, Align);
|
||||||
|
|
||||||
SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV, SVOffset,
|
|
||||||
isVolatile, Alignment);
|
|
||||||
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
|
|
||||||
LdChain.push_back(LdOp.getValue(1));
|
LdChain.push_back(LdOp.getValue(1));
|
||||||
|
|
||||||
// Check if we can load the element with one instruction
|
// Check if we can load the element with one instruction
|
||||||
if (LdWidth == NewEltVTWidth) {
|
if (LdWidth <= NewVTWidth) {
|
||||||
return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
|
if (NewVT.isVector()) {
|
||||||
|
if (NewVT != WidenVT) {
|
||||||
|
assert(WidenWidth % NewVTWidth == 0);
|
||||||
|
unsigned NumConcat = WidenWidth / NewVTWidth;
|
||||||
|
SmallVector<SDValue, 16> ConcatOps(NumConcat);
|
||||||
|
SDValue UndefVal = DAG.getUNDEF(NewVT);
|
||||||
|
ConcatOps[0] = LdOp;
|
||||||
|
for (unsigned i = 1; i != NumConcat; ++i)
|
||||||
|
ConcatOps[i] = UndefVal;
|
||||||
|
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
|
||||||
|
NumConcat);
|
||||||
|
} else
|
||||||
|
return LdOp;
|
||||||
|
} else {
|
||||||
|
unsigned NumElts = WidenWidth / LdWidth;
|
||||||
|
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
|
||||||
|
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
|
||||||
|
return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Idx = 1;
|
// Load vector by using multiple loads from largest vector to scalar
|
||||||
LdWidth -= NewEltVTWidth;
|
SmallVector<SDValue, 16> LdOps;
|
||||||
|
LdOps.push_back(LdOp);
|
||||||
|
|
||||||
|
LdWidth -= NewVTWidth;
|
||||||
unsigned Offset = 0;
|
unsigned Offset = 0;
|
||||||
|
|
||||||
while (LdWidth > 0) {
|
while (LdWidth > 0) {
|
||||||
unsigned Increment = NewEltVTWidth / 8;
|
unsigned Increment = NewVTWidth / 8;
|
||||||
Offset += Increment;
|
Offset += Increment;
|
||||||
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
|
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
|
||||||
DAG.getIntPtrConstant(Increment));
|
DAG.getIntPtrConstant(Increment));
|
||||||
|
|
||||||
if (LdWidth < NewEltVTWidth) {
|
if (LdWidth < NewVTWidth) {
|
||||||
// Our current type we are using is too large, use a smaller size by
|
// Our current type we are using is too large, find a better size
|
||||||
// using a smaller power of 2
|
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
|
||||||
unsigned oNewEltVTWidth = NewEltVTWidth;
|
NewVTWidth = NewVT.getSizeInBits();
|
||||||
FindAssocWidenVecType(DAG, TLI, LdWidth, ResType, NewEltVT, NewVecVT);
|
|
||||||
NewEltVTWidth = NewEltVT.getSizeInBits();
|
|
||||||
// Readjust position and vector position based on new load type
|
|
||||||
Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
|
|
||||||
VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue LdOp = DAG.getLoad(NewEltVT, dl, Chain, BasePtr, SV,
|
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
|
||||||
SVOffset+Offset, isVolatile,
|
SVOffset+Offset, isVolatile,
|
||||||
MinAlign(Alignment, Offset));
|
MinAlign(Align, Increment));
|
||||||
LdChain.push_back(LdOp.getValue(1));
|
LdChain.push_back(LdOp.getValue(1));
|
||||||
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOp,
|
LdOps.push_back(LdOp);
|
||||||
DAG.getIntPtrConstant(Idx++));
|
|
||||||
|
|
||||||
LdWidth -= NewEltVTWidth;
|
LdWidth -= NewVTWidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
return DAG.getNode(ISD::BIT_CONVERT, dl, ResType, VecOp);
|
// Build the vector from the loads operations
|
||||||
|
unsigned End = LdOps.size();
|
||||||
|
if (LdOps[0].getValueType().isVector()) {
|
||||||
|
// If the load contains vectors, build the vector using concat vector.
|
||||||
|
// All of the vectors used to loads are power of 2 and the scalars load
|
||||||
|
// can be combined to make a power of 2 vector.
|
||||||
|
SmallVector<SDValue, 16> ConcatOps(End);
|
||||||
|
int i = End - 1;
|
||||||
|
int Idx = End;
|
||||||
|
EVT LdTy = LdOps[i].getValueType();
|
||||||
|
// First combine the scalar loads to a vector
|
||||||
|
if (!LdTy.isVector()) {
|
||||||
|
for (--i; i >= 0; --i) {
|
||||||
|
LdTy = LdOps[i].getValueType();
|
||||||
|
if (LdTy.isVector())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
|
||||||
|
}
|
||||||
|
ConcatOps[--Idx] = LdOps[i];
|
||||||
|
for (--i; i >= 0; --i) {
|
||||||
|
EVT NewLdTy = LdOps[i].getValueType();
|
||||||
|
if (NewLdTy != LdTy) {
|
||||||
|
// Create a larger vector
|
||||||
|
ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
|
||||||
|
&ConcatOps[Idx], End - Idx);
|
||||||
|
Idx = End - 1;
|
||||||
|
LdTy = NewLdTy;
|
||||||
|
}
|
||||||
|
ConcatOps[--Idx] = LdOps[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
|
||||||
|
// We need to fill the rest with undefs to build the vector
|
||||||
|
unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
|
||||||
|
SmallVector<SDValue, 16> WidenOps(NumOps);
|
||||||
|
SDValue UndefVal = DAG.getUNDEF(LdTy);
|
||||||
|
unsigned i = 0;
|
||||||
|
for (; i != End-Idx; ++i)
|
||||||
|
WidenOps[i] = ConcatOps[Idx+i];
|
||||||
|
for (; i != NumOps; ++i)
|
||||||
|
WidenOps[i] = UndefVal;
|
||||||
|
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
|
||||||
|
} else
|
||||||
|
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
|
||||||
|
&ConcatOps[Idx], End - Idx);
|
||||||
|
} else // All the loads are scalar loads.
|
||||||
|
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
|
SDValue
|
||||||
SDValue Chain,
|
DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
|
||||||
SDValue BasePtr,
|
LoadSDNode * LD,
|
||||||
const Value *SV,
|
ISD::LoadExtType ExtType) {
|
||||||
int SVOffset,
|
// For extension loads, it may not be more efficient to chop up the vector
|
||||||
unsigned Alignment,
|
// and then extended it. Instead, we unroll the load and build a new vector.
|
||||||
bool isVolatile,
|
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
|
||||||
SDValue ValOp,
|
EVT LdVT = LD->getMemoryVT();
|
||||||
unsigned StWidth,
|
DebugLoc dl = LD->getDebugLoc();
|
||||||
DebugLoc dl) {
|
assert(LdVT.isVector() && WidenVT.isVector());
|
||||||
// Breaks the stores into a series of power of 2 width stores. For any
|
|
||||||
// width, we convert the vector to the vector of element size that we
|
|
||||||
// want to store. This avoids requiring a stack convert.
|
|
||||||
|
|
||||||
// Find a width of the element type we can store with
|
// Load information
|
||||||
EVT WidenVT = ValOp.getValueType();
|
SDValue Chain = LD->getChain();
|
||||||
EVT NewEltVT, NewVecVT;
|
SDValue BasePtr = LD->getBasePtr();
|
||||||
|
int SVOffset = LD->getSrcValueOffset();
|
||||||
|
unsigned Align = LD->getAlignment();
|
||||||
|
bool isVolatile = LD->isVolatile();
|
||||||
|
const Value *SV = LD->getSrcValue();
|
||||||
|
|
||||||
FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
|
EVT EltVT = WidenVT.getVectorElementType();
|
||||||
unsigned NewEltVTWidth = NewEltVT.getSizeInBits();
|
EVT LdEltVT = LdVT.getVectorElementType();
|
||||||
|
unsigned NumElts = LdVT.getVectorNumElements();
|
||||||
|
|
||||||
SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
|
// Load each element and widen
|
||||||
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
|
unsigned WidenNumElts = WidenVT.getVectorNumElements();
|
||||||
DAG.getIntPtrConstant(0));
|
SmallVector<SDValue, 16> Ops(WidenNumElts);
|
||||||
SDValue StOp = DAG.getStore(Chain, dl, EOp, BasePtr, SV, SVOffset,
|
unsigned Increment = LdEltVT.getSizeInBits() / 8;
|
||||||
isVolatile, Alignment);
|
Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
|
||||||
StChain.push_back(StOp);
|
LdEltVT, isVolatile, Align);
|
||||||
|
LdChain.push_back(Ops[0].getValue(1));
|
||||||
// Check if we are done
|
unsigned i = 0, Offset = Increment;
|
||||||
if (StWidth == NewEltVTWidth) {
|
for (i=1; i < NumElts; ++i, Offset += Increment) {
|
||||||
return;
|
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
|
||||||
|
BasePtr, DAG.getIntPtrConstant(Offset));
|
||||||
|
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
|
||||||
|
SVOffset + Offset, LdEltVT, isVolatile, Align);
|
||||||
|
LdChain.push_back(Ops[i].getValue(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned Idx = 1;
|
// Fill the rest with undefs
|
||||||
StWidth -= NewEltVTWidth;
|
SDValue UndefVal = DAG.getUNDEF(EltVT);
|
||||||
unsigned Offset = 0;
|
for (; i != WidenNumElts; ++i)
|
||||||
|
Ops[i] = UndefVal;
|
||||||
|
|
||||||
while (StWidth > 0) {
|
return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
|
||||||
unsigned Increment = NewEltVTWidth / 8;
|
}
|
||||||
Offset += Increment;
|
|
||||||
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
|
|
||||||
DAG.getIntPtrConstant(Increment));
|
|
||||||
|
|
||||||
if (StWidth < NewEltVTWidth) {
|
|
||||||
// Our current type we are using is too large, use a smaller size by
|
|
||||||
// using a smaller power of 2
|
|
||||||
unsigned oNewEltVTWidth = NewEltVTWidth;
|
|
||||||
FindAssocWidenVecType(DAG, TLI, StWidth, WidenVT, NewEltVT, NewVecVT);
|
|
||||||
NewEltVTWidth = NewEltVT.getSizeInBits();
|
|
||||||
// Readjust position and vector position based on new load type
|
|
||||||
Idx = Idx * (oNewEltVTWidth/NewEltVTWidth);
|
|
||||||
VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
|
|
||||||
}
|
|
||||||
|
|
||||||
EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewEltVT, VecOp,
|
void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
|
||||||
|
StoreSDNode *ST) {
|
||||||
|
// The strategy assumes that we can efficiently store powers of two widths.
|
||||||
|
// The routines chops the vector into the largest vector stores with the same
|
||||||
|
// element type or scalar stores.
|
||||||
|
SDValue Chain = ST->getChain();
|
||||||
|
SDValue BasePtr = ST->getBasePtr();
|
||||||
|
const Value *SV = ST->getSrcValue();
|
||||||
|
int SVOffset = ST->getSrcValueOffset();
|
||||||
|
unsigned Align = ST->getAlignment();
|
||||||
|
bool isVolatile = ST->isVolatile();
|
||||||
|
SDValue ValOp = GetWidenedVector(ST->getValue());
|
||||||
|
DebugLoc dl = ST->getDebugLoc();
|
||||||
|
|
||||||
|
EVT StVT = ST->getMemoryVT();
|
||||||
|
unsigned StWidth = StVT.getSizeInBits();
|
||||||
|
EVT ValVT = ValOp.getValueType();
|
||||||
|
unsigned ValWidth = ValVT.getSizeInBits();
|
||||||
|
EVT ValEltVT = ValVT.getVectorElementType();
|
||||||
|
unsigned ValEltWidth = ValEltVT.getSizeInBits();
|
||||||
|
assert(StVT.getVectorElementType() == ValEltVT);
|
||||||
|
|
||||||
|
int Idx = 0; // current index to store
|
||||||
|
unsigned Offset = 0; // offset from base to store
|
||||||
|
while (StWidth != 0) {
|
||||||
|
// Find the largest vector type we can store with
|
||||||
|
EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
|
||||||
|
unsigned NewVTWidth = NewVT.getSizeInBits();
|
||||||
|
unsigned Increment = NewVTWidth / 8;
|
||||||
|
if (NewVT.isVector()) {
|
||||||
|
unsigned NumVTElts = NewVT.getVectorNumElements();
|
||||||
|
do {
|
||||||
|
SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
|
||||||
|
DAG.getIntPtrConstant(Idx));
|
||||||
|
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
|
||||||
|
SVOffset + Offset, isVolatile,
|
||||||
|
MinAlign(Align, Offset)));
|
||||||
|
StWidth -= NewVTWidth;
|
||||||
|
Offset += Increment;
|
||||||
|
Idx += NumVTElts;
|
||||||
|
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
|
||||||
|
DAG.getIntPtrConstant(Increment));
|
||||||
|
} while (StWidth != 0 && StWidth >= NewVTWidth);
|
||||||
|
} else {
|
||||||
|
// Cast the vector to the scalar type we can store
|
||||||
|
unsigned NumElts = ValWidth / NewVTWidth;
|
||||||
|
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
|
||||||
|
SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
|
||||||
|
// Readjust index position based on new vector type
|
||||||
|
Idx = Idx * ValEltWidth / NewVTWidth;
|
||||||
|
do {
|
||||||
|
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
|
||||||
DAG.getIntPtrConstant(Idx++));
|
DAG.getIntPtrConstant(Idx++));
|
||||||
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
|
StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
|
||||||
SVOffset + Offset, isVolatile,
|
SVOffset + Offset, isVolatile,
|
||||||
MinAlign(Alignment, Offset)));
|
MinAlign(Align, Offset)));
|
||||||
StWidth -= NewEltVTWidth;
|
StWidth -= NewVTWidth;
|
||||||
|
Offset += Increment;
|
||||||
|
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
|
||||||
|
DAG.getIntPtrConstant(Increment));
|
||||||
|
} while (StWidth != 0 && StWidth >= NewVTWidth);
|
||||||
|
// Restore index back to be relative to the original widen element type
|
||||||
|
Idx = Idx * NewVTWidth / ValEltWidth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
|
||||||
|
StoreSDNode *ST) {
|
||||||
|
// For extension loads, it may not be more efficient to truncate the vector
|
||||||
|
// and then store it. Instead, we extract each element and then store it.
|
||||||
|
SDValue Chain = ST->getChain();
|
||||||
|
SDValue BasePtr = ST->getBasePtr();
|
||||||
|
const Value *SV = ST->getSrcValue();
|
||||||
|
int SVOffset = ST->getSrcValueOffset();
|
||||||
|
unsigned Align = ST->getAlignment();
|
||||||
|
bool isVolatile = ST->isVolatile();
|
||||||
|
SDValue ValOp = GetWidenedVector(ST->getValue());
|
||||||
|
DebugLoc dl = ST->getDebugLoc();
|
||||||
|
|
||||||
|
EVT StVT = ST->getMemoryVT();
|
||||||
|
EVT ValVT = ValOp.getValueType();
|
||||||
|
|
||||||
|
// It must be true that we the widen vector type is bigger than where
|
||||||
|
// we need to store.
|
||||||
|
assert(StVT.isVector() && ValOp.getValueType().isVector());
|
||||||
|
assert(StVT.bitsLT(ValOp.getValueType()));
|
||||||
|
|
||||||
|
// For truncating stores, we can not play the tricks of chopping legal
|
||||||
|
// vector types and bit cast it to the right type. Instead, we unroll
|
||||||
|
// the store.
|
||||||
|
EVT StEltVT = StVT.getVectorElementType();
|
||||||
|
EVT ValEltVT = ValVT.getVectorElementType();
|
||||||
|
unsigned Increment = ValEltVT.getSizeInBits() / 8;
|
||||||
|
unsigned NumElts = StVT.getVectorNumElements();
|
||||||
|
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
|
||||||
|
DAG.getIntPtrConstant(0));
|
||||||
|
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
|
||||||
|
SVOffset, StEltVT,
|
||||||
|
isVolatile, Align));
|
||||||
|
unsigned Offset = Increment;
|
||||||
|
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
|
||||||
|
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
|
||||||
|
BasePtr, DAG.getIntPtrConstant(Offset));
|
||||||
|
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
|
||||||
|
DAG.getIntPtrConstant(0));
|
||||||
|
StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
|
||||||
|
SVOffset + Offset, StEltVT,
|
||||||
|
isVolatile, MinAlign(Align, Offset)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -747,6 +747,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
|
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
|
||||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
|
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
|
||||||
|
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom);
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom);
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
|
||||||
|
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
|
||||||
|
|
||||||
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
|
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
|
||||||
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
|
for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
|
||||||
EVT VT = (MVT::SimpleValueType)i;
|
EVT VT = (MVT::SimpleValueType)i;
|
||||||
@ -3686,6 +3692,33 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue
|
||||||
|
X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
|
||||||
|
// We support concatenate two MMX registers and place them in a MMX
|
||||||
|
// register. This is better than doing a stack convert.
|
||||||
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
|
EVT ResVT = Op.getValueType();
|
||||||
|
assert(Op.getNumOperands() == 2);
|
||||||
|
assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
|
||||||
|
ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
|
||||||
|
int Mask[2];
|
||||||
|
SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0));
|
||||||
|
SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
|
||||||
|
InVec = Op.getOperand(1);
|
||||||
|
if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
|
||||||
|
unsigned NumElts = ResVT.getVectorNumElements();
|
||||||
|
VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
|
||||||
|
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
|
||||||
|
InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
|
||||||
|
} else {
|
||||||
|
InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec);
|
||||||
|
SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
|
||||||
|
Mask[0] = 0; Mask[1] = 2;
|
||||||
|
VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
|
||||||
|
}
|
||||||
|
return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp);
|
||||||
|
}
|
||||||
|
|
||||||
// v8i16 shuffles - Prefer shuffles in the following order:
|
// v8i16 shuffles - Prefer shuffles in the following order:
|
||||||
// 1. [all] pshuflw, pshufhw, optional move
|
// 1. [all] pshuflw, pshufhw, optional move
|
||||||
// 2. [ssse3] 1 x pshufb
|
// 2. [ssse3] 1 x pshufb
|
||||||
@ -7238,6 +7271,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
|
|||||||
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
|
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
|
||||||
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
|
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
|
||||||
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
|
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
|
||||||
|
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
|
||||||
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
|
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
|
||||||
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
|
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
|
||||||
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
|
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
|
||||||
|
@ -156,6 +156,11 @@ namespace llvm {
|
|||||||
/// relative displacements.
|
/// relative displacements.
|
||||||
WrapperRIP,
|
WrapperRIP,
|
||||||
|
|
||||||
|
/// MOVQ2DQ - Copies a 64-bit value from a vector to another vector.
|
||||||
|
/// Can be used to move a vector value from a MMX register to a XMM
|
||||||
|
/// register.
|
||||||
|
MOVQ2DQ,
|
||||||
|
|
||||||
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
|
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
|
||||||
/// i32, corresponds to X86::PEXTRB.
|
/// i32, corresponds to X86::PEXTRB.
|
||||||
PEXTRB,
|
PEXTRB,
|
||||||
@ -634,6 +639,7 @@ namespace llvm {
|
|||||||
SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
|
SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
|
||||||
SelectionDAG &DAG);
|
SelectionDAG &DAG);
|
||||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
|
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
|
||||||
|
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG);
|
||||||
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
|
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
|
||||||
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
|
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
|
||||||
SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
|
SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG);
|
||||||
|
@ -501,6 +501,20 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
(iPTR imm:$src3))))]>;
|
(iPTR imm:$src3))))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MMX to XMM for vector types
|
||||||
|
def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
|
||||||
|
[SDTCisVT<0, v2i64>, SDTCisVT<1, v1i64>]>>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
|
||||||
|
(v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
|
||||||
|
(v2i64 (MOVQI2PQIrm addr:$src))>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (MMX_X86movq2dq (v1i64 (bitconvert
|
||||||
|
(v2i32 (scalar_to_vector (loadi32 addr:$src))))))),
|
||||||
|
(v2i64 (MOVDI2PDIrm addr:$src))>;
|
||||||
|
|
||||||
// Mask creation
|
// Mask creation
|
||||||
def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
|
def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
|
||||||
"pmovmskb\t{$src, $dst|$dst, $src}",
|
"pmovmskb\t{$src, $dst|$dst, $src}",
|
||||||
|
@ -2,10 +2,8 @@
|
|||||||
; CHECK: pextrd
|
; CHECK: pextrd
|
||||||
; CHECK: pextrd
|
; CHECK: pextrd
|
||||||
; CHECK: movd
|
; CHECK: movd
|
||||||
; CHECK: pextrd
|
; CHECK: movaps
|
||||||
; CHECK: pextrd
|
|
||||||
; CHECK: pextrd
|
|
||||||
; CHECK: movd
|
|
||||||
|
|
||||||
; bitcast v14i16 to v7i32
|
; bitcast v14i16 to v7i32
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
; This load should be before the call, not after.
|
; This load should be before the call, not after.
|
||||||
|
|
||||||
; CHECK: movq compl+128(%rip), %xmm0
|
; CHECK: movaps compl+128(%rip), %xmm0
|
||||||
; CHECK: movaps %xmm0, (%rsp)
|
; CHECK: movaps %xmm0, (%rsp)
|
||||||
; CHECK: callq killcommon
|
; CHECK: callq killcommon
|
||||||
|
|
||||||
|
155
test/CodeGen/X86/widen_load-2.ll
Normal file
155
test/CodeGen/X86/widen_load-2.ll
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 -disable-mmx | FileCheck %s
|
||||||
|
|
||||||
|
; Test based on pr5626 to load/store
|
||||||
|
;
|
||||||
|
|
||||||
|
%i32vec3 = type <3 x i32>
|
||||||
|
define void @add3i32(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: paddd
|
||||||
|
; CHECK: pextrd
|
||||||
|
; CHECK: movq
|
||||||
|
%a = load %i32vec3* %ap, align 16
|
||||||
|
%b = load %i32vec3* %bp, align 16
|
||||||
|
%x = add %i32vec3 %a, %b
|
||||||
|
store %i32vec3 %x, %i32vec3* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @add3i32_2(%i32vec3* sret %ret, %i32vec3* %ap, %i32vec3* %bp) {
|
||||||
|
; CHECK: movq
|
||||||
|
; CHECK: pinsrd
|
||||||
|
; CHECK: movq
|
||||||
|
; CHECK: pinsrd
|
||||||
|
; CHECK: paddd
|
||||||
|
; CHECK: pextrd
|
||||||
|
; CHECK: movq
|
||||||
|
%a = load %i32vec3* %ap
|
||||||
|
%b = load %i32vec3* %bp
|
||||||
|
%x = add %i32vec3 %a, %b
|
||||||
|
store %i32vec3 %x, %i32vec3* %ret
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
%i32vec7 = type <7 x i32>
|
||||||
|
define void @add7i32(%i32vec7* sret %ret, %i32vec7* %ap, %i32vec7* %bp) {
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: paddd
|
||||||
|
; CHECK: paddd
|
||||||
|
; CHECK: pextrd
|
||||||
|
; CHECK: movq
|
||||||
|
; CHECK: movaps
|
||||||
|
%a = load %i32vec7* %ap, align 16
|
||||||
|
%b = load %i32vec7* %bp, align 16
|
||||||
|
%x = add %i32vec7 %a, %b
|
||||||
|
store %i32vec7 %x, %i32vec7* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
%i32vec12 = type <12 x i32>
|
||||||
|
define void @add12i32(%i32vec12* sret %ret, %i32vec12* %ap, %i32vec12* %bp) {
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: paddd
|
||||||
|
; CHECK: paddd
|
||||||
|
; CHECK: paddd
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
%a = load %i32vec12* %ap, align 16
|
||||||
|
%b = load %i32vec12* %bp, align 16
|
||||||
|
%x = add %i32vec12 %a, %b
|
||||||
|
store %i32vec12 %x, %i32vec12* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
%i16vec3 = type <3 x i16>
|
||||||
|
define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: paddw
|
||||||
|
; CHECK: movd
|
||||||
|
; CHECK: pextrw
|
||||||
|
%a = load %i16vec3* %ap, align 16
|
||||||
|
%b = load %i16vec3* %bp, align 16
|
||||||
|
%x = add %i16vec3 %a, %b
|
||||||
|
store %i16vec3 %x, %i16vec3* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
%i16vec4 = type <4 x i16>
|
||||||
|
define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: paddw
|
||||||
|
; CHECK: movq
|
||||||
|
%a = load %i16vec4* %ap, align 16
|
||||||
|
%b = load %i16vec4* %bp, align 16
|
||||||
|
%x = add %i16vec4 %a, %b
|
||||||
|
store %i16vec4 %x, %i16vec4* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
%i16vec12 = type <12 x i16>
|
||||||
|
define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: paddw
|
||||||
|
; CHECK: paddw
|
||||||
|
; CHECK: movq
|
||||||
|
; CHECK: movaps
|
||||||
|
%a = load %i16vec12* %ap, align 16
|
||||||
|
%b = load %i16vec12* %bp, align 16
|
||||||
|
%x = add %i16vec12 %a, %b
|
||||||
|
store %i16vec12 %x, %i16vec12* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
%i16vec18 = type <18 x i16>
|
||||||
|
define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: paddw
|
||||||
|
; CHECK: paddw
|
||||||
|
; CHECK: paddw
|
||||||
|
; CHECK: movd
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
%a = load %i16vec18* %ap, align 16
|
||||||
|
%b = load %i16vec18* %bp, align 16
|
||||||
|
%x = add %i16vec18 %a, %b
|
||||||
|
store %i16vec18 %x, %i16vec18* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
%i8vec3 = type <3 x i8>
|
||||||
|
define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: paddb
|
||||||
|
; CHECK: pextrb
|
||||||
|
; CHECK: movb
|
||||||
|
%a = load %i8vec3* %ap, align 16
|
||||||
|
%b = load %i8vec3* %bp, align 16
|
||||||
|
%x = add %i8vec3 %a, %b
|
||||||
|
store %i8vec3 %x, %i8vec3* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
%i8vec31 = type <31 x i8>
|
||||||
|
define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: movaps
|
||||||
|
; CHECK: paddb
|
||||||
|
; CHECK: paddb
|
||||||
|
; CHECK: movq
|
||||||
|
; CHECK: pextrb
|
||||||
|
; CHECK: pextrw
|
||||||
|
%a = load %i8vec31* %ap, align 16
|
||||||
|
%b = load %i8vec31* %bp, align 16
|
||||||
|
%x = add %i8vec31 %a, %b
|
||||||
|
store %i8vec31 %x, %i8vec31* %ret, align 16
|
||||||
|
ret void
|
||||||
|
}
|
Reference in New Issue
Block a user