mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-07 14:33:15 +00:00
Fixed a bug in type legalizer for masked load/store intrinsics.
The problem occurs when after vectorization we have type <2 x i32>. This type is promoted to <2 x i64> and then requires additional efforts for expanding loads and truncating stores. I added EXPAND / TRUNCATE attributes to the masked load/store SDNodes. The code now contains additional shuffles. I've prepared changes in the cost estimation for masked memory operations, it will be submitted separately. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226808 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9cb8df2c75
commit
2785766bc8
@ -867,9 +867,11 @@ public:
|
||||
SDValue Offset, ISD::MemIndexedMode AM);
|
||||
|
||||
SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
|
||||
SDValue Mask, SDValue Src0, MachineMemOperand *MMO);
|
||||
SDValue Mask, SDValue Src0, EVT MemVT,
|
||||
MachineMemOperand *MMO, ISD::LoadExtType);
|
||||
SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
|
||||
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO);
|
||||
SDValue Ptr, SDValue Mask, EVT MemVT,
|
||||
MachineMemOperand *MMO, bool IsTrunc);
|
||||
/// getSrcValue - Construct a node to track a Value* through the backend.
|
||||
SDValue getSrcValue(const Value *v);
|
||||
|
||||
|
@ -1970,13 +1970,17 @@ public:
|
||||
class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
|
||||
public:
|
||||
friend class SelectionDAG;
|
||||
MaskedLoadSDNode(unsigned Order, DebugLoc dl,
|
||||
SDValue *Operands, unsigned numOperands,
|
||||
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
|
||||
MaskedLoadSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
|
||||
unsigned numOperands, SDVTList VTs, ISD::LoadExtType ETy,
|
||||
EVT MemVT, MachineMemOperand *MMO)
|
||||
: MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
|
||||
VTs, MemVT, MMO)
|
||||
{}
|
||||
VTs, MemVT, MMO) {
|
||||
SubclassData |= (unsigned short)ETy;
|
||||
}
|
||||
|
||||
ISD::LoadExtType getExtensionType() const {
|
||||
return ISD::LoadExtType(SubclassData & 3);
|
||||
}
|
||||
const SDValue &getSrc0() const { return getOperand(3); }
|
||||
static bool classof(const SDNode *N) {
|
||||
return N->getOpcode() == ISD::MLOAD;
|
||||
@ -1989,14 +1993,19 @@ class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
|
||||
|
||||
public:
|
||||
friend class SelectionDAG;
|
||||
MaskedStoreSDNode(unsigned Order, DebugLoc dl,
|
||||
SDValue *Operands, unsigned numOperands,
|
||||
SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
|
||||
MaskedStoreSDNode(unsigned Order, DebugLoc dl, SDValue *Operands,
|
||||
unsigned numOperands, SDVTList VTs, bool isTrunc, EVT MemVT,
|
||||
MachineMemOperand *MMO)
|
||||
: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
|
||||
VTs, MemVT, MMO)
|
||||
{}
|
||||
VTs, MemVT, MMO) {
|
||||
SubclassData |= (unsigned short)isTrunc;
|
||||
}
|
||||
/// isTruncatingStore - Return true if the op does a truncation before store.
|
||||
/// For integers this is the same as doing a TRUNCATE and storing the result.
|
||||
/// For floats, it is the same as doing an FP_ROUND and storing the result.
|
||||
bool isTruncatingStore() const { return SubclassData & 1; }
|
||||
|
||||
const SDValue &getData() const { return getOperand(3); }
|
||||
const SDValue &getValue() const { return getOperand(3); }
|
||||
|
||||
static bool classof(const SDNode *N) {
|
||||
return N->getOpcode() == ISD::MSTORE;
|
||||
|
@ -4853,7 +4853,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
|
||||
|
||||
MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
|
||||
SDValue Mask = MST->getMask();
|
||||
SDValue Data = MST->getData();
|
||||
SDValue Data = MST->getValue();
|
||||
SDLoc DL(N);
|
||||
|
||||
// If the MSTORE data type requires splitting and the mask is provided by a
|
||||
@ -4896,7 +4896,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
|
||||
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
|
||||
Alignment, MST->getAAInfo(), MST->getRanges());
|
||||
|
||||
Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO);
|
||||
Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
|
||||
MST->isTruncatingStore());
|
||||
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
@ -4908,7 +4909,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
|
||||
SecondHalfAlignment, MST->getAAInfo(),
|
||||
MST->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO);
|
||||
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
|
||||
MST->isTruncatingStore());
|
||||
|
||||
AddToWorklist(Lo.getNode());
|
||||
AddToWorklist(Hi.getNode());
|
||||
@ -4969,7 +4971,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
|
||||
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
|
||||
Alignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
|
||||
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO);
|
||||
Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
|
||||
ISD::NON_EXTLOAD);
|
||||
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
@ -4980,7 +4983,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
|
||||
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO);
|
||||
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
|
||||
ISD::NON_EXTLOAD);
|
||||
|
||||
AddToWorklist(Lo.getNode());
|
||||
AddToWorklist(Hi.getNode());
|
||||
@ -9497,7 +9501,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
|
||||
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
|
||||
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
|
||||
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
|
||||
// The narrowwing should be profitable, the load/store operation should be
|
||||
// The narrowing should be profitable, the load/store operation should be
|
||||
// legal (or custom) and the store size should be equal to the NewVT width.
|
||||
while (NewBW < BitWidth &&
|
||||
(NewVT.getStoreSizeInBits() != NewBW ||
|
||||
|
@ -458,16 +458,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
|
||||
SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT);
|
||||
|
||||
SDValue Mask = N->getMask();
|
||||
EVT NewMaskVT = getSetCCResultType(NVT);
|
||||
if (NewMaskVT != N->getMask().getValueType())
|
||||
Mask = PromoteTargetBoolean(Mask, NewMaskVT);
|
||||
SDLoc dl(N);
|
||||
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(N->getPointerInfo(),
|
||||
MachineMemOperand::MOLoad, NVT.getStoreSize(),
|
||||
N->getAlignment(), N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
|
||||
ExtMask, ExtSrc0, MMO);
|
||||
Mask, ExtSrc0, N->getMemoryVT(),
|
||||
N->getMemOperand(), ISD::SEXTLOAD);
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
@ -1117,16 +1117,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
|
||||
|
||||
assert(OpNo == 2 && "Only know how to promote the mask!");
|
||||
SDValue DataOp = N->getData();
|
||||
SDValue DataOp = N->getValue();
|
||||
EVT DataVT = DataOp.getValueType();
|
||||
SDValue Mask = N->getMask();
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
SDLoc dl(N);
|
||||
|
||||
bool TruncateStore = false;
|
||||
if (!TLI.isTypeLegal(DataVT)) {
|
||||
if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
|
||||
DataOp = GetPromotedInteger(DataOp);
|
||||
Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
|
||||
TruncateStore = true;
|
||||
}
|
||||
else {
|
||||
assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
|
||||
@ -1156,10 +1158,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpN
|
||||
}
|
||||
else
|
||||
Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
|
||||
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
|
||||
NewOps[2] = Mask;
|
||||
NewOps[3] = DataOp;
|
||||
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
|
||||
return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
|
||||
N->getMemoryVT(), N->getMemOperand(),
|
||||
TruncateStore);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
|
||||
|
@ -659,6 +659,7 @@ private:
|
||||
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
|
||||
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue WidenVecOp_STORE(SDNode* N);
|
||||
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
|
||||
SDValue WidenVecOp_SETCC(SDNode* N);
|
||||
|
||||
SDValue WidenVecOp_Convert(SDNode *N);
|
||||
|
@ -992,6 +992,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
||||
SDValue Ptr = MLD->getBasePtr();
|
||||
SDValue Mask = MLD->getMask();
|
||||
unsigned Alignment = MLD->getOriginalAlignment();
|
||||
ISD::LoadExtType ExtType = MLD->getExtensionType();
|
||||
|
||||
// if Alignment is equal to the vector size,
|
||||
// take the half of it for the second part
|
||||
@ -1015,7 +1016,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
||||
MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
|
||||
Alignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
|
||||
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO);
|
||||
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
|
||||
ExtType);
|
||||
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
|
||||
@ -1026,7 +1028,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
|
||||
MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO);
|
||||
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
|
||||
ExtType);
|
||||
|
||||
|
||||
// Build a factor node to remember that this load is independent of the
|
||||
@ -1464,7 +1467,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
||||
SDValue Ch = N->getChain();
|
||||
SDValue Ptr = N->getBasePtr();
|
||||
SDValue Mask = N->getMask();
|
||||
SDValue Data = N->getData();
|
||||
SDValue Data = N->getValue();
|
||||
EVT MemoryVT = N->getMemoryVT();
|
||||
unsigned Alignment = N->getOriginalAlignment();
|
||||
SDLoc DL(N);
|
||||
@ -1489,7 +1492,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
||||
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
|
||||
Alignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO);
|
||||
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
|
||||
N->isTruncatingStore());
|
||||
|
||||
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
|
||||
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
||||
@ -1500,7 +1504,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
|
||||
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
|
||||
SecondHalfAlignment, N->getAAInfo(), N->getRanges());
|
||||
|
||||
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO);
|
||||
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
|
||||
N->isTruncatingStore());
|
||||
|
||||
|
||||
// Build a factor node to remember that this store is independent of the
|
||||
@ -2412,6 +2417,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
|
||||
SDValue Mask = N->getMask();
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
SDValue Src0 = GetWidenedVector(N->getSrc0());
|
||||
ISD::LoadExtType ExtType = N->getExtensionType();
|
||||
SDLoc dl(N);
|
||||
|
||||
if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
|
||||
@ -2434,14 +2440,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
|
||||
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
|
||||
}
|
||||
|
||||
// Rebuild memory operand because MemoryVT was changed
|
||||
MachineMemOperand *MMO = DAG.getMachineFunction().
|
||||
getMachineMemOperand(N->getPointerInfo(),
|
||||
MachineMemOperand::MOLoad, WidenVT.getStoreSize(),
|
||||
N->getAlignment(), N->getAAInfo(), N->getRanges());
|
||||
|
||||
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
|
||||
Mask, Src0, MMO);
|
||||
Mask, Src0, N->getMemoryVT(),
|
||||
N->getMemOperand(), ExtType);
|
||||
// Legalized the chain result - switch anything that used the old chain to
|
||||
// use the new one.
|
||||
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
|
||||
@ -2593,6 +2594,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
|
||||
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
|
||||
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
|
||||
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
|
||||
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
|
||||
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
|
||||
|
||||
case ISD::ANY_EXTEND:
|
||||
@ -2791,6 +2793,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
|
||||
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
|
||||
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
|
||||
SDValue Mask = MST->getMask();
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
SDValue StVal = MST->getValue();
|
||||
// Widen the value
|
||||
SDValue WideVal = GetWidenedVector(StVal);
|
||||
SDLoc dl(N);
|
||||
|
||||
if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
|
||||
Mask = GetWidenedVector(Mask);
|
||||
else {
|
||||
// The mask should be widened as well
|
||||
EVT BoolVT = getSetCCResultType(WideVal.getValueType());
|
||||
// We can't use ModifyToType() because we should fill the mask with
|
||||
// zeroes
|
||||
unsigned WidenNumElts = BoolVT.getVectorNumElements();
|
||||
unsigned MaskNumElts = MaskVT.getVectorNumElements();
|
||||
|
||||
unsigned NumConcat = WidenNumElts / MaskNumElts;
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
SDValue ZeroVal = DAG.getConstant(0, MaskVT);
|
||||
Ops[0] = Mask;
|
||||
for (unsigned i = 1; i != NumConcat; ++i)
|
||||
Ops[i] = ZeroVal;
|
||||
|
||||
Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
|
||||
}
|
||||
assert(Mask.getValueType().getVectorNumElements() ==
|
||||
WideVal.getValueType().getVectorNumElements() &&
|
||||
"Mask and data vectors should have the same number of elements");
|
||||
return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
|
||||
Mask, MST->getMemoryVT(), MST->getMemOperand(),
|
||||
false);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
|
||||
SDValue InOp0 = GetWidenedVector(N->getOperand(0));
|
||||
SDValue InOp1 = GetWidenedVector(N->getOperand(1));
|
||||
|
@ -4924,15 +4924,15 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
|
||||
|
||||
SDValue
|
||||
SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
|
||||
SDValue Ptr, SDValue Mask, SDValue Src0,
|
||||
MachineMemOperand *MMO) {
|
||||
SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT,
|
||||
MachineMemOperand *MMO, ISD::LoadExtType ExtTy) {
|
||||
|
||||
SDVTList VTs = getVTList(VT, MVT::Other);
|
||||
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
|
||||
FoldingSetNodeID ID;
|
||||
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
|
||||
ID.AddInteger(VT.getRawBits());
|
||||
ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
|
||||
ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
|
||||
MMO->isVolatile(),
|
||||
MMO->isNonTemporal(),
|
||||
MMO->isInvariant()));
|
||||
@ -4944,14 +4944,15 @@ SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
|
||||
}
|
||||
SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
|
||||
dl.getDebugLoc(), Ops, 4, VTs,
|
||||
VT, MMO);
|
||||
ExtTy, MemVT, MMO);
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
}
|
||||
|
||||
SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
|
||||
SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) {
|
||||
SDValue Ptr, SDValue Mask, EVT MemVT,
|
||||
MachineMemOperand *MMO, bool isTrunc) {
|
||||
assert(Chain.getValueType() == MVT::Other &&
|
||||
"Invalid chain type");
|
||||
EVT VT = Val.getValueType();
|
||||
@ -4970,7 +4971,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
|
||||
}
|
||||
SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
|
||||
dl.getDebugLoc(), Ops, 4,
|
||||
VTs, VT, MMO);
|
||||
VTs, isTrunc, MemVT, MMO);
|
||||
CSEMap.InsertNode(N, IP);
|
||||
InsertNode(N);
|
||||
return SDValue(N, 0);
|
||||
|
@ -3697,7 +3697,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
|
||||
getMachineMemOperand(MachinePointerInfo(PtrOperand),
|
||||
MachineMemOperand::MOStore, VT.getStoreSize(),
|
||||
Alignment, AAInfo);
|
||||
SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO);
|
||||
SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
|
||||
MMO, false);
|
||||
DAG.setRoot(StoreNode);
|
||||
setValue(&I, StoreNode);
|
||||
}
|
||||
@ -3736,7 +3737,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
|
||||
MachineMemOperand::MOLoad, VT.getStoreSize(),
|
||||
Alignment, AAInfo, Ranges);
|
||||
|
||||
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO);
|
||||
SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
|
||||
ISD::NON_EXTLOAD);
|
||||
SDValue OutChain = Load.getValue(1);
|
||||
DAG.setRoot(OutChain);
|
||||
setValue(&I, Load);
|
||||
|
@ -1709,7 +1709,9 @@ void X86TargetLowering::resetOperationActions() {
|
||||
setTargetDAGCombine(ISD::FMA);
|
||||
setTargetDAGCombine(ISD::SUB);
|
||||
setTargetDAGCombine(ISD::LOAD);
|
||||
setTargetDAGCombine(ISD::MLOAD);
|
||||
setTargetDAGCombine(ISD::STORE);
|
||||
setTargetDAGCombine(ISD::MSTORE);
|
||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||
@ -24796,6 +24798,166 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformMLOADCombine - Resolve extending loads
|
||||
static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const X86Subtarget *Subtarget) {
|
||||
MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
|
||||
if (Mld->getExtensionType() != ISD::SEXTLOAD)
|
||||
return SDValue();
|
||||
|
||||
EVT VT = Mld->getValueType(0);
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
EVT LdVT = Mld->getMemoryVT();
|
||||
SDLoc dl(Mld);
|
||||
|
||||
assert(LdVT != VT && "Cannot extend to the same type");
|
||||
unsigned ToSz = VT.getVectorElementType().getSizeInBits();
|
||||
unsigned FromSz = LdVT.getVectorElementType().getSizeInBits();
|
||||
// From, To sizes and ElemCount must be pow of two
|
||||
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
|
||||
"Unexpected size for extending masked load");
|
||||
|
||||
unsigned SizeRatio = ToSz / FromSz;
|
||||
assert(SizeRatio * NumElems * FromSz == VT.getSizeInBits());
|
||||
|
||||
// Create a type on which we perform the shuffle
|
||||
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
LdVT.getScalarType(), NumElems*SizeRatio);
|
||||
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
|
||||
|
||||
// Convert Src0 value
|
||||
SDValue WideSrc0 = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mld->getSrc0());
|
||||
if (Mld->getSrc0().getOpcode() != ISD::UNDEF) {
|
||||
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
ShuffleVec[i] = i * SizeRatio;
|
||||
|
||||
// Can't shuffle using an illegal type.
|
||||
assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
|
||||
WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
|
||||
DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
|
||||
}
|
||||
// Prepare the new mask
|
||||
SDValue NewMask;
|
||||
SDValue Mask = Mld->getMask();
|
||||
if (Mask.getValueType() == VT) {
|
||||
// Mask and original value have the same type
|
||||
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
|
||||
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
ShuffleVec[i] = i * SizeRatio;
|
||||
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
|
||||
ShuffleVec[i] = NumElems*SizeRatio;
|
||||
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
|
||||
DAG.getConstant(0, WideVecVT),
|
||||
&ShuffleVec[0]);
|
||||
}
|
||||
else {
|
||||
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
|
||||
unsigned WidenNumElts = NumElems*SizeRatio;
|
||||
unsigned MaskNumElts = VT.getVectorNumElements();
|
||||
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
WidenNumElts);
|
||||
|
||||
unsigned NumConcat = WidenNumElts / MaskNumElts;
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
|
||||
Ops[0] = Mask;
|
||||
for (unsigned i = 1; i != NumConcat; ++i)
|
||||
Ops[i] = ZeroVal;
|
||||
|
||||
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
|
||||
}
|
||||
|
||||
SDValue WideLd = DAG.getMaskedLoad(WideVecVT, dl, Mld->getChain(),
|
||||
Mld->getBasePtr(), NewMask, WideSrc0,
|
||||
Mld->getMemoryVT(), Mld->getMemOperand(),
|
||||
ISD::NON_EXTLOAD);
|
||||
SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
|
||||
return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
|
||||
|
||||
}
|
||||
/// PerformMSTORECombine - Resolve truncating stores
|
||||
static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
|
||||
if (!Mst->isTruncatingStore())
|
||||
return SDValue();
|
||||
|
||||
EVT VT = Mst->getValue().getValueType();
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
EVT StVT = Mst->getMemoryVT();
|
||||
SDLoc dl(Mst);
|
||||
|
||||
assert(StVT != VT && "Cannot truncate to the same type");
|
||||
unsigned FromSz = VT.getVectorElementType().getSizeInBits();
|
||||
unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
|
||||
|
||||
// From, To sizes and ElemCount must be pow of two
|
||||
assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
|
||||
"Unexpected size for truncating masked store");
|
||||
// We are going to use the original vector elt for storing.
|
||||
// Accumulated smaller vector elements must be a multiple of the store size.
|
||||
assert (((NumElems * FromSz) % ToSz) == 0 &&
|
||||
"Unexpected ratio for truncating masked store");
|
||||
|
||||
unsigned SizeRatio = FromSz / ToSz;
|
||||
assert(SizeRatio * NumElems * ToSz == VT.getSizeInBits());
|
||||
|
||||
// Create a type on which we perform the shuffle
|
||||
EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(),
|
||||
StVT.getScalarType(), NumElems*SizeRatio);
|
||||
|
||||
assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
|
||||
|
||||
SDValue WideVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mst->getValue());
|
||||
SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
ShuffleVec[i] = i * SizeRatio;
|
||||
|
||||
// Can't shuffle using an illegal type.
|
||||
assert (TLI.isTypeLegal(WideVecVT) && "WideVecVT should be legal");
|
||||
|
||||
SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
|
||||
DAG.getUNDEF(WideVecVT),
|
||||
&ShuffleVec[0]);
|
||||
|
||||
SDValue NewMask;
|
||||
SDValue Mask = Mst->getMask();
|
||||
if (Mask.getValueType() == VT) {
|
||||
// Mask and original value have the same type
|
||||
NewMask = DAG.getNode(ISD::BITCAST, dl, WideVecVT, Mask);
|
||||
for (unsigned i = 0; i != NumElems; ++i)
|
||||
ShuffleVec[i] = i * SizeRatio;
|
||||
for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
|
||||
ShuffleVec[i] = NumElems*SizeRatio;
|
||||
NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
|
||||
DAG.getConstant(0, WideVecVT),
|
||||
&ShuffleVec[0]);
|
||||
}
|
||||
else {
|
||||
assert(Mask.getValueType().getVectorElementType() == MVT::i1);
|
||||
unsigned WidenNumElts = NumElems*SizeRatio;
|
||||
unsigned MaskNumElts = VT.getVectorNumElements();
|
||||
EVT NewMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
|
||||
WidenNumElts);
|
||||
|
||||
unsigned NumConcat = WidenNumElts / MaskNumElts;
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
SDValue ZeroVal = DAG.getConstant(0, Mask.getValueType());
|
||||
Ops[0] = Mask;
|
||||
for (unsigned i = 1; i != NumConcat; ++i)
|
||||
Ops[i] = ZeroVal;
|
||||
|
||||
NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
|
||||
}
|
||||
|
||||
return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
|
||||
NewMask, StVT, Mst->getMemOperand(), false);
|
||||
}
|
||||
/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
|
||||
static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
||||
const X86Subtarget *Subtarget) {
|
||||
@ -25894,7 +26056,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::LOAD: return PerformLOADCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::MLOAD: return PerformMLOADCombine(N, DAG, DCI, Subtarget);
|
||||
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
|
||||
case ISD::MSTORE: return PerformMSTORECombine(N, DAG, Subtarget);
|
||||
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
|
||||
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
|
||||
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
|
||||
|
@ -159,7 +159,7 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
|
||||
}
|
||||
|
||||
; AVX2-LABEL: test15
|
||||
; AVX2: vpmaskmovq
|
||||
; AVX2: vpmaskmovd
|
||||
define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
|
||||
@ -176,8 +176,9 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
|
||||
}
|
||||
|
||||
; AVX2-LABEL: test17
|
||||
; AVX2: vpmaskmovq
|
||||
; AVX2: vblendvpd
|
||||
; AVX2: vpmaskmovd
|
||||
; AVX2: vblendvps
|
||||
; AVX2: vpmovsxdq
|
||||
define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
|
||||
|
Loading…
x
Reference in New Issue
Block a user