mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-21 00:32:23 +00:00
- Use specific nodes to match unpckl masks.
- Teach getShuffleScalarElt how to handle more target specific nodes, so the DAGCombine can make use of it. - Add another hack to avoid the node update problem during legalization. More description on the comments git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112934 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ecf93e4ab3
commit
5e5342b0a8
@ -15,6 +15,7 @@
|
|||||||
#define DEBUG_TYPE "x86-isel"
|
#define DEBUG_TYPE "x86-isel"
|
||||||
#include "X86.h"
|
#include "X86.h"
|
||||||
#include "X86InstrBuilder.h"
|
#include "X86InstrBuilder.h"
|
||||||
|
#include "X86ShuffleDecode.h"
|
||||||
#include "X86ISelLowering.h"
|
#include "X86ISelLowering.h"
|
||||||
#include "X86TargetMachine.h"
|
#include "X86TargetMachine.h"
|
||||||
#include "X86TargetObjectFile.h"
|
#include "X86TargetObjectFile.h"
|
||||||
@ -2602,9 +2603,11 @@ static bool isTargetShuffle(unsigned Opcode) {
|
|||||||
case X86ISD::MOVSS:
|
case X86ISD::MOVSS:
|
||||||
case X86ISD::MOVSD:
|
case X86ISD::MOVSD:
|
||||||
case X86ISD::UNPCKLPS:
|
case X86ISD::UNPCKLPS:
|
||||||
|
case X86ISD::UNPCKLPD:
|
||||||
case X86ISD::PUNPCKLWD:
|
case X86ISD::PUNPCKLWD:
|
||||||
case X86ISD::PUNPCKLBW:
|
case X86ISD::PUNPCKLBW:
|
||||||
case X86ISD::PUNPCKLDQ:
|
case X86ISD::PUNPCKLDQ:
|
||||||
|
case X86ISD::PUNPCKLQDQ:
|
||||||
case X86ISD::UNPCKHPS:
|
case X86ISD::UNPCKHPS:
|
||||||
case X86ISD::PUNPCKHWD:
|
case X86ISD::PUNPCKHWD:
|
||||||
case X86ISD::PUNPCKHBW:
|
case X86ISD::PUNPCKHBW:
|
||||||
@ -2663,9 +2666,11 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
|
|||||||
case X86ISD::MOVSS:
|
case X86ISD::MOVSS:
|
||||||
case X86ISD::MOVSD:
|
case X86ISD::MOVSD:
|
||||||
case X86ISD::UNPCKLPS:
|
case X86ISD::UNPCKLPS:
|
||||||
|
case X86ISD::UNPCKLPD:
|
||||||
case X86ISD::PUNPCKLWD:
|
case X86ISD::PUNPCKLWD:
|
||||||
case X86ISD::PUNPCKLBW:
|
case X86ISD::PUNPCKLBW:
|
||||||
case X86ISD::PUNPCKLDQ:
|
case X86ISD::PUNPCKLDQ:
|
||||||
|
case X86ISD::PUNPCKLQDQ:
|
||||||
case X86ISD::UNPCKHPS:
|
case X86ISD::UNPCKHPS:
|
||||||
case X86ISD::PUNPCKHWD:
|
case X86ISD::PUNPCKHWD:
|
||||||
case X86ISD::PUNPCKHBW:
|
case X86ISD::PUNPCKHBW:
|
||||||
@ -3698,7 +3703,60 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG) {
|
|||||||
|
|
||||||
// Recurse into target specific vector shuffles to find scalars.
|
// Recurse into target specific vector shuffles to find scalars.
|
||||||
if (isTargetShuffle(Opcode)) {
|
if (isTargetShuffle(Opcode)) {
|
||||||
|
int NumElems = VT.getVectorNumElements();
|
||||||
|
SmallVector<unsigned, 16> ShuffleMask;
|
||||||
|
SDValue ImmN;
|
||||||
|
|
||||||
switch(Opcode) {
|
switch(Opcode) {
|
||||||
|
case X86ISD::SHUFPS:
|
||||||
|
case X86ISD::SHUFPD:
|
||||||
|
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||||
|
DecodeSHUFPSMask(NumElems,
|
||||||
|
cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||||
|
ShuffleMask);
|
||||||
|
break;
|
||||||
|
case X86ISD::PUNPCKHBW:
|
||||||
|
case X86ISD::PUNPCKHWD:
|
||||||
|
case X86ISD::PUNPCKHDQ:
|
||||||
|
case X86ISD::PUNPCKHQDQ:
|
||||||
|
DecodePUNPCKHMask(NumElems, ShuffleMask);
|
||||||
|
break;
|
||||||
|
case X86ISD::UNPCKHPS:
|
||||||
|
case X86ISD::UNPCKHPD:
|
||||||
|
DecodeUNPCKHPMask(NumElems, ShuffleMask);
|
||||||
|
break;
|
||||||
|
case X86ISD::PUNPCKLBW:
|
||||||
|
case X86ISD::PUNPCKLWD:
|
||||||
|
case X86ISD::PUNPCKLDQ:
|
||||||
|
case X86ISD::PUNPCKLQDQ:
|
||||||
|
DecodePUNPCKLMask(NumElems, ShuffleMask);
|
||||||
|
break;
|
||||||
|
case X86ISD::UNPCKLPS:
|
||||||
|
case X86ISD::UNPCKLPD:
|
||||||
|
DecodeUNPCKLPMask(NumElems, ShuffleMask);
|
||||||
|
break;
|
||||||
|
case X86ISD::MOVHLPS:
|
||||||
|
DecodeMOVHLPSMask(NumElems, ShuffleMask);
|
||||||
|
break;
|
||||||
|
case X86ISD::MOVLHPS:
|
||||||
|
DecodeMOVLHPSMask(NumElems, ShuffleMask);
|
||||||
|
break;
|
||||||
|
case X86ISD::PSHUFD:
|
||||||
|
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||||
|
DecodePSHUFMask(NumElems,
|
||||||
|
cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||||
|
ShuffleMask);
|
||||||
|
break;
|
||||||
|
case X86ISD::PSHUFHW:
|
||||||
|
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||||
|
DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||||
|
ShuffleMask);
|
||||||
|
break;
|
||||||
|
case X86ISD::PSHUFLW:
|
||||||
|
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||||
|
DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||||
|
ShuffleMask);
|
||||||
|
break;
|
||||||
case X86ISD::MOVSS:
|
case X86ISD::MOVSS:
|
||||||
case X86ISD::MOVSD: {
|
case X86ISD::MOVSD: {
|
||||||
// The index 0 always comes from the first element of the second source,
|
// The index 0 always comes from the first element of the second source,
|
||||||
@ -3711,6 +3769,13 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG) {
|
|||||||
assert("not implemented for target shuffle node");
|
assert("not implemented for target shuffle node");
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Index = ShuffleMask[Index];
|
||||||
|
if (Index < 0)
|
||||||
|
return DAG.getUNDEF(VT.getVectorElementType());
|
||||||
|
|
||||||
|
SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
|
||||||
|
return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Actual nodes that may contain scalar elements
|
// Actual nodes that may contain scalar elements
|
||||||
@ -5049,6 +5114,16 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
|
|||||||
return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
|
return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool MayFoldVectorLoad(SDValue V) {
|
||||||
|
if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
|
||||||
|
V = V.getOperand(0);
|
||||||
|
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
|
||||||
|
V = V.getOperand(0);
|
||||||
|
if (MayFoldLoad(V))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
|
SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
|
||||||
bool HasSSE2) {
|
bool HasSSE2) {
|
||||||
@ -5093,15 +5168,9 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
|
|||||||
// potencial load folding here, otherwise use SHUFPS or MOVSD to match the
|
// potencial load folding here, otherwise use SHUFPS or MOVSD to match the
|
||||||
// same masks.
|
// same masks.
|
||||||
bool CanFoldLoad = false;
|
bool CanFoldLoad = false;
|
||||||
SDValue TmpV1 = V1;
|
|
||||||
SDValue TmpV2 = V2;
|
|
||||||
|
|
||||||
// Trivial case, when V2 comes from a load.
|
// Trivial case, when V2 comes from a load.
|
||||||
if (TmpV2.hasOneUse() && TmpV2.getOpcode() == ISD::BIT_CONVERT)
|
if (MayFoldVectorLoad(V2))
|
||||||
TmpV2 = TmpV2.getOperand(0);
|
|
||||||
if (TmpV2.hasOneUse() && TmpV2.getOpcode() == ISD::SCALAR_TO_VECTOR)
|
|
||||||
TmpV2 = TmpV2.getOperand(0);
|
|
||||||
if (MayFoldLoad(TmpV2))
|
|
||||||
CanFoldLoad = true;
|
CanFoldLoad = true;
|
||||||
|
|
||||||
// When V1 is a load, it can be folded later into a store in isel, example:
|
// When V1 is a load, it can be folded later into a store in isel, example:
|
||||||
@ -5109,9 +5178,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
|
|||||||
// turns into:
|
// turns into:
|
||||||
// (MOVLPSmr addr:$src1, VR128:$src2)
|
// (MOVLPSmr addr:$src1, VR128:$src2)
|
||||||
// So, recognize this potential and also use MOVLPS or MOVLPD
|
// So, recognize this potential and also use MOVLPS or MOVLPD
|
||||||
if (TmpV1.hasOneUse() && TmpV1.getOpcode() == ISD::BIT_CONVERT)
|
if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
|
||||||
TmpV1 = TmpV1.getOperand(0);
|
|
||||||
if (MayFoldLoad(TmpV1) && MayFoldIntoStore(Op))
|
|
||||||
CanFoldLoad = true;
|
CanFoldLoad = true;
|
||||||
|
|
||||||
if (CanFoldLoad) {
|
if (CanFoldLoad) {
|
||||||
@ -5142,6 +5209,20 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
|
|||||||
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned getUNPCKLOpcode(EVT VT) {
|
||||||
|
switch(VT.getSimpleVT().SimpleTy) {
|
||||||
|
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
|
||||||
|
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
|
||||||
|
case MVT::v4f32: return X86ISD::UNPCKLPS;
|
||||||
|
case MVT::v2f64: return X86ISD::UNPCKLPD;
|
||||||
|
case MVT::v16i8: return X86ISD::PUNPCKLBW;
|
||||||
|
case MVT::v8i16: return X86ISD::PUNPCKLWD;
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Unknow type for unpckl");
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||||
@ -5272,7 +5353,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
|
|
||||||
// FIXME: fold these into legal mask.
|
// FIXME: fold these into legal mask.
|
||||||
if (!isMMX) {
|
if (!isMMX) {
|
||||||
if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
|
if (X86::isMOVLHPSMask(SVOp) &&
|
||||||
|
(!X86::isUNPCKLMask(SVOp) || MayFoldVectorLoad(V2)))
|
||||||
return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
|
return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
|
||||||
|
|
||||||
if (X86::isMOVHLPSMask(SVOp))
|
if (X86::isMOVHLPSMask(SVOp))
|
||||||
@ -5326,8 +5408,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
return getMOVL(DAG, dl, VT, V2, V1);
|
return getMOVL(DAG, dl, VT, V2, V1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (X86::isUNPCKLMask(SVOp) ||
|
if (X86::isUNPCKLMask(SVOp))
|
||||||
X86::isUNPCKHMask(SVOp))
|
return (isMMX) ?
|
||||||
|
Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
|
||||||
|
|
||||||
|
if (X86::isUNPCKHMask(SVOp))
|
||||||
return Op;
|
return Op;
|
||||||
|
|
||||||
if (V2IsSplat) {
|
if (V2IsSplat) {
|
||||||
@ -5350,8 +5435,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
// FIXME: this seems wrong.
|
// FIXME: this seems wrong.
|
||||||
SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
|
SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
|
||||||
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
|
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
|
||||||
if (X86::isUNPCKLMask(NewSVOp) ||
|
|
||||||
X86::isUNPCKHMask(NewSVOp))
|
if (X86::isUNPCKLMask(NewSVOp))
|
||||||
|
return (isMMX) ?
|
||||||
|
Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
|
||||||
|
|
||||||
|
if (X86::isUNPCKHMask(NewSVOp))
|
||||||
return NewOp;
|
return NewOp;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -10621,7 +10710,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
SelectionDAG &DAG = DCI.DAG;
|
SelectionDAG &DAG = DCI.DAG;
|
||||||
switch (N->getOpcode()) {
|
switch (N->getOpcode()) {
|
||||||
default: break;
|
default: break;
|
||||||
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
|
|
||||||
case ISD::EXTRACT_VECTOR_ELT:
|
case ISD::EXTRACT_VECTOR_ELT:
|
||||||
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
|
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
|
||||||
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
|
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
|
||||||
@ -10638,6 +10726,28 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||||||
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
|
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
|
||||||
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
|
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
|
||||||
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
|
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
|
||||||
|
case X86ISD::SHUFPS: // Handle all target specific shuffles
|
||||||
|
case X86ISD::SHUFPD:
|
||||||
|
case X86ISD::PUNPCKHBW:
|
||||||
|
case X86ISD::PUNPCKHWD:
|
||||||
|
case X86ISD::PUNPCKHDQ:
|
||||||
|
case X86ISD::PUNPCKHQDQ:
|
||||||
|
case X86ISD::UNPCKHPS:
|
||||||
|
case X86ISD::UNPCKHPD:
|
||||||
|
case X86ISD::PUNPCKLBW:
|
||||||
|
case X86ISD::PUNPCKLWD:
|
||||||
|
case X86ISD::PUNPCKLDQ:
|
||||||
|
case X86ISD::PUNPCKLQDQ:
|
||||||
|
case X86ISD::UNPCKLPS:
|
||||||
|
case X86ISD::UNPCKLPD:
|
||||||
|
case X86ISD::MOVHLPS:
|
||||||
|
case X86ISD::MOVLHPS:
|
||||||
|
case X86ISD::PSHUFD:
|
||||||
|
case X86ISD::PSHUFHW:
|
||||||
|
case X86ISD::PSHUFLW:
|
||||||
|
case X86ISD::MOVSS:
|
||||||
|
case X86ISD::MOVSD:
|
||||||
|
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
|
||||||
}
|
}
|
||||||
|
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
@ -5819,6 +5819,13 @@ def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
|
|||||||
def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
|
def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
|
||||||
(scalar_to_vector (loadf64 addr:$src2)))),
|
(scalar_to_vector (loadf64 addr:$src2)))),
|
||||||
(MOVHPDrm VR128:$src1, addr:$src2)>;
|
(MOVHPDrm VR128:$src1, addr:$src2)>;
|
||||||
|
// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
|
||||||
|
// is during lowering, where it's not possible to recognize the load fold cause
|
||||||
|
// it has two uses through a bitcast. One use disappears at isel time and the
|
||||||
|
// fold opportunity reappears.
|
||||||
|
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
|
||||||
|
(scalar_to_vector (loadf64 addr:$src2)))),
|
||||||
|
(MOVHPDrm VR128:$src1, addr:$src2)>;
|
||||||
|
|
||||||
// Shuffle with MOVSS
|
// Shuffle with MOVSS
|
||||||
def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
|
def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
|
||||||
|
Loading…
Reference in New Issue
Block a user