mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-12 07:37:34 +00:00
Merge floating point and integer UNPCK X86ISD node types.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145926 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
18851edbc4
commit
34671b812a
@ -2851,10 +2851,8 @@ static bool isTargetShuffle(unsigned Opcode) {
|
||||
case X86ISD::MOVDDUP:
|
||||
case X86ISD::MOVSS:
|
||||
case X86ISD::MOVSD:
|
||||
case X86ISD::UNPCKLP:
|
||||
case X86ISD::PUNPCKL:
|
||||
case X86ISD::UNPCKHP:
|
||||
case X86ISD::PUNPCKH:
|
||||
case X86ISD::UNPCKL:
|
||||
case X86ISD::UNPCKH:
|
||||
case X86ISD::VPERMILP:
|
||||
case X86ISD::VPERM2X128:
|
||||
return true;
|
||||
@ -2914,10 +2912,8 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
|
||||
case X86ISD::MOVLPD:
|
||||
case X86ISD::MOVSS:
|
||||
case X86ISD::MOVSD:
|
||||
case X86ISD::UNPCKLP:
|
||||
case X86ISD::PUNPCKL:
|
||||
case X86ISD::UNPCKHP:
|
||||
case X86ISD::PUNPCKH:
|
||||
case X86ISD::UNPCKL:
|
||||
case X86ISD::UNPCKH:
|
||||
return DAG.getNode(Opc, dl, VT, V1, V2);
|
||||
}
|
||||
return SDValue();
|
||||
@ -4460,12 +4456,10 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
|
||||
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||
ShuffleMask);
|
||||
break;
|
||||
case X86ISD::PUNPCKH:
|
||||
case X86ISD::UNPCKHP:
|
||||
case X86ISD::UNPCKH:
|
||||
DecodeUNPCKHMask(VT, ShuffleMask);
|
||||
break;
|
||||
case X86ISD::PUNPCKL:
|
||||
case X86ISD::UNPCKLP:
|
||||
case X86ISD::UNPCKL:
|
||||
DecodeUNPCKLMask(VT, ShuffleMask);
|
||||
break;
|
||||
case X86ISD::MOVHLPS:
|
||||
@ -6364,50 +6358,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
|
||||
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
||||
}
|
||||
|
||||
static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
|
||||
switch(VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::v32i8:
|
||||
case MVT::v16i8:
|
||||
case MVT::v16i16:
|
||||
case MVT::v8i16:
|
||||
case MVT::v4i32:
|
||||
case MVT::v2i64: return X86ISD::PUNPCKL;
|
||||
case MVT::v8i32:
|
||||
case MVT::v4i64:
|
||||
if (HasAVX2) return X86ISD::PUNPCKL;
|
||||
// else use fp unit for int unpack.
|
||||
case MVT::v8f32:
|
||||
case MVT::v4f32:
|
||||
case MVT::v4f64:
|
||||
case MVT::v2f64: return X86ISD::UNPCKLP;
|
||||
default:
|
||||
llvm_unreachable("Unknown type for unpckl");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
|
||||
switch(VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::v32i8:
|
||||
case MVT::v16i8:
|
||||
case MVT::v16i16:
|
||||
case MVT::v8i16:
|
||||
case MVT::v4i32:
|
||||
case MVT::v2i64: return X86ISD::PUNPCKH;
|
||||
case MVT::v4i64:
|
||||
case MVT::v8i32:
|
||||
if (HasAVX2) return X86ISD::PUNPCKH;
|
||||
// else use fp unit for int unpack.
|
||||
case MVT::v8f32:
|
||||
case MVT::v4f32:
|
||||
case MVT::v4f64:
|
||||
case MVT::v2f64: return X86ISD::UNPCKHP;
|
||||
default:
|
||||
llvm_unreachable("Unknown type for unpckh");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
|
||||
const TargetLowering &TLI,
|
||||
@ -6518,11 +6468,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
// NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
|
||||
// unpckh_undef). Only use pshufd if speed is more important than size.
|
||||
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
|
||||
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
|
||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
|
||||
|
||||
if (X86::isMOVDDUPMask(SVOp) && Subtarget->hasSSE3orAVX() &&
|
||||
V2IsUndef && RelaxedMayFoldVectorLoad(V1))
|
||||
@ -6534,8 +6482,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
// Use to match splats
|
||||
if (HasXMMInt && X86::isUNPCKHMask(SVOp, HasAVX2) && V2IsUndef &&
|
||||
(VT == MVT::v2f64 || VT == MVT::v2i64))
|
||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
|
||||
|
||||
if (X86::isPSHUFDMask(SVOp)) {
|
||||
// The actual implementation will match the mask in the if above and then
|
||||
@ -6635,12 +6582,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
if (isUNPCKLMask(M, VT, HasAVX2))
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V2,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
|
||||
|
||||
if (isUNPCKHMask(M, VT, HasAVX2))
|
||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V2,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
|
||||
|
||||
if (V2IsSplat) {
|
||||
// Normalize mask so all entries that point to V2 points to its first
|
||||
@ -6664,12 +6609,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
|
||||
|
||||
if (X86::isUNPCKLMask(NewSVOp, HasAVX2))
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V2, V1,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V2, V1, DAG);
|
||||
|
||||
if (X86::isUNPCKHMask(NewSVOp, HasAVX2))
|
||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V2, V1,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V2, V1, DAG);
|
||||
}
|
||||
|
||||
// Normalize the node to match x86 shuffle ops if needed
|
||||
@ -6689,8 +6632,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
|
||||
SVOp->getSplatIndex() == 0 && V2IsUndef) {
|
||||
if (VT == MVT::v2f64 || VT == MVT::v2i64)
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
|
||||
}
|
||||
|
||||
if (isPSHUFHWMask(M, VT))
|
||||
@ -6708,11 +6650,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
X86::getShuffleSHUFImmediate(SVOp), DAG);
|
||||
|
||||
if (isUNPCKL_v_undef_Mask(M, VT))
|
||||
return getTargetShuffleNode(getUNPCKLOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
|
||||
if (isUNPCKH_v_undef_Mask(M, VT))
|
||||
return getTargetShuffleNode(getUNPCKHOpcode(VT, HasAVX2), dl, VT, V1, V1,
|
||||
DAG);
|
||||
return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Generate target specific nodes for 128 or 256-bit shuffles only
|
||||
@ -11023,10 +10963,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
|
||||
case X86ISD::MOVSD: return "X86ISD::MOVSD";
|
||||
case X86ISD::MOVSS: return "X86ISD::MOVSS";
|
||||
case X86ISD::UNPCKLP: return "X86ISD::UNPCKLP";
|
||||
case X86ISD::UNPCKHP: return "X86ISD::UNPCKHP";
|
||||
case X86ISD::PUNPCKL: return "X86ISD::PUNPCKL";
|
||||
case X86ISD::PUNPCKH: return "X86ISD::PUNPCKH";
|
||||
case X86ISD::UNPCKL: return "X86ISD::UNPCKL";
|
||||
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
|
||||
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
|
||||
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
|
||||
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
|
||||
@ -14616,10 +14554,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case X86ISD::SHUFPS: // Handle all target specific shuffles
|
||||
case X86ISD::SHUFPD:
|
||||
case X86ISD::PALIGN:
|
||||
case X86ISD::PUNPCKH:
|
||||
case X86ISD::UNPCKHP:
|
||||
case X86ISD::PUNPCKL:
|
||||
case X86ISD::UNPCKLP:
|
||||
case X86ISD::UNPCKH:
|
||||
case X86ISD::UNPCKL:
|
||||
case X86ISD::MOVHLPS:
|
||||
case X86ISD::MOVLHPS:
|
||||
case X86ISD::PSHUFD:
|
||||
|
@ -273,10 +273,8 @@ namespace llvm {
|
||||
MOVLPD,
|
||||
MOVSD,
|
||||
MOVSS,
|
||||
UNPCKLP,
|
||||
UNPCKHP,
|
||||
PUNPCKL,
|
||||
PUNPCKH,
|
||||
UNPCKL,
|
||||
UNPCKH,
|
||||
VPERMILP,
|
||||
VPERM2X128,
|
||||
VBROADCAST,
|
||||
|
@ -130,11 +130,8 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
|
||||
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
|
||||
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
|
||||
|
||||
def X86Unpcklp : SDNode<"X86ISD::UNPCKLP", SDTShuff2Op>;
|
||||
def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>;
|
||||
|
||||
def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>;
|
||||
def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>;
|
||||
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
|
||||
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
|
||||
|
||||
def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
|
||||
|
||||
|
@ -1157,11 +1157,11 @@ let Predicates = [HasAVX] in {
|
||||
(bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
|
||||
(VMOVHPSrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
// FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
|
||||
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
|
||||
// is during lowering, where it's not possible to recognize the load fold cause
|
||||
// it has two uses through a bitcast. One use disappears at isel time and the
|
||||
// fold opportunity reappears.
|
||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
|
||||
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
|
||||
(scalar_to_vector (loadf64 addr:$src2)))),
|
||||
(VMOVHPDrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
@ -1172,10 +1172,10 @@ let Predicates = [HasAVX] in {
|
||||
|
||||
// Store patterns
|
||||
def : Pat<(store (f64 (vector_extract
|
||||
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
||||
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
||||
(VMOVHPSmr addr:$dst, VR128:$src)>;
|
||||
def : Pat<(store (f64 (vector_extract
|
||||
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
||||
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
||||
(VMOVHPDmr addr:$dst, VR128:$src)>;
|
||||
}
|
||||
|
||||
@ -1195,16 +1195,16 @@ let Predicates = [HasSSE1] in {
|
||||
|
||||
// Store patterns
|
||||
def : Pat<(store (f64 (vector_extract
|
||||
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
||||
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst),
|
||||
(MOVHPSmr addr:$dst, VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
// FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem
|
||||
// FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
|
||||
// is during lowering, where it's not possible to recognize the load fold cause
|
||||
// it has two uses through a bitcast. One use disappears at isel time and the
|
||||
// fold opportunity reappears.
|
||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1,
|
||||
def : Pat<(v2f64 (X86Unpckl VR128:$src1,
|
||||
(scalar_to_vector (loadf64 addr:$src2)))),
|
||||
(MOVHPDrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
@ -1215,7 +1215,7 @@ let Predicates = [HasSSE2] in {
|
||||
|
||||
// Store patterns
|
||||
def : Pat<(store (f64 (vector_extract
|
||||
(v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))),addr:$dst),
|
||||
(v2f64 (X86Unpckh VR128:$src, (undef))), (iPTR 0))),addr:$dst),
|
||||
(MOVHPDmr addr:$dst, VR128:$src)>;
|
||||
}
|
||||
|
||||
@ -2431,27 +2431,27 @@ let AddedComplexity = 10 in {
|
||||
} // AddedComplexity
|
||||
|
||||
let Predicates = [HasSSE1] in {
|
||||
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
|
||||
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
(UNPCKHPSrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
|
||||
(UNPCKHPSrr VR128:$src1, VR128:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
|
||||
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
(UNPCKHPDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
|
||||
(UNPCKHPDrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
// FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
|
||||
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
|
||||
// problem is during lowering, where it's not possible to recognize the load
|
||||
// fold cause it has two uses through a bitcast. One use disappears at isel
|
||||
// time and the fold opportunity reappears.
|
||||
@ -2464,59 +2464,43 @@ let Predicates = [HasSSE2] in {
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
def : Pat<(v4f32 (X86Unpckl VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
(VUNPCKLPSrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v4f32 (X86Unpckl VR128:$src1, VR128:$src2)),
|
||||
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
def : Pat<(v4f32 (X86Unpckh VR128:$src1, (memopv4f32 addr:$src2))),
|
||||
(VUNPCKHPSrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v4f32 (X86Unpckh VR128:$src1, VR128:$src2)),
|
||||
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
def : Pat<(v8f32 (X86Unpcklp VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
def : Pat<(v8f32 (X86Unpckl VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8f32 (X86Unpcklp VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v8f32 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpcklp VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpcklp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8f32 (X86Unpckhp VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
def : Pat<(v8f32 (X86Unpckh VR256:$src1, (memopv8f32 addr:$src2))),
|
||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8f32 (X86Unpckhp VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckhp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckhp VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v8f32 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
||||
|
||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
def : Pat<(v2f64 (X86Unpckl VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
(VUNPCKLPDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v2f64 (X86Unpckl VR128:$src1, VR128:$src2)),
|
||||
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
def : Pat<(v2f64 (X86Unpckh VR128:$src1, (memopv2f64 addr:$src2))),
|
||||
(VUNPCKHPDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v2f64 (X86Unpckh VR128:$src1, VR128:$src2)),
|
||||
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
|
||||
|
||||
def : Pat<(v4f64 (X86Unpcklp VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
def : Pat<(v4f64 (X86Unpckl VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4f64 (X86Unpcklp VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v4f64 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpcklp VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpcklp VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v4f64 (X86Unpckhp VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
def : Pat<(v4f64 (X86Unpckh VR256:$src1, (memopv4f64 addr:$src2))),
|
||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4f64 (X86Unpckhp VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckhp VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckhp VR256:$src1, VR256:$src2)),
|
||||
def : Pat<(v4f64 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||
|
||||
// FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the
|
||||
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
|
||||
// problem is during lowering, where it's not possible to recognize the load
|
||||
// fold cause it has two uses through a bitcast. One use disappears at isel
|
||||
// time and the fold opportunity reappears.
|
||||
@ -4199,66 +4183,88 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpckl,
|
||||
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
|
||||
bc_v16i8, 0>, VEX_4V;
|
||||
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpckl,
|
||||
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
|
||||
bc_v8i16, 0>, VEX_4V;
|
||||
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckl,
|
||||
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
|
||||
bc_v4i32, 0>, VEX_4V;
|
||||
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpckl,
|
||||
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
|
||||
bc_v2i64, 0>, VEX_4V;
|
||||
|
||||
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckh,
|
||||
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
|
||||
bc_v16i8, 0>, VEX_4V;
|
||||
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckh,
|
||||
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
|
||||
bc_v8i16, 0>, VEX_4V;
|
||||
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckh,
|
||||
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
|
||||
bc_v4i32, 0>, VEX_4V;
|
||||
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckh,
|
||||
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
|
||||
bc_v2i64, 0>, VEX_4V;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpckl,
|
||||
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
|
||||
bc_v32i8>, VEX_4V;
|
||||
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpckl,
|
||||
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
|
||||
bc_v16i16>, VEX_4V;
|
||||
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckl,
|
||||
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
|
||||
bc_v8i32>, VEX_4V;
|
||||
defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpckl,
|
||||
defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
|
||||
bc_v4i64>, VEX_4V;
|
||||
|
||||
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckh,
|
||||
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
|
||||
bc_v32i8>, VEX_4V;
|
||||
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckh,
|
||||
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
|
||||
bc_v16i16>, VEX_4V;
|
||||
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckh,
|
||||
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
|
||||
bc_v8i32>, VEX_4V;
|
||||
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckh,
|
||||
defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
|
||||
bc_v4i64>, VEX_4V;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpckl,
|
||||
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
|
||||
bc_v16i8>;
|
||||
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpckl,
|
||||
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
|
||||
bc_v8i16>;
|
||||
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckl,
|
||||
defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
|
||||
bc_v4i32>;
|
||||
defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpckl,
|
||||
defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
|
||||
bc_v2i64>;
|
||||
|
||||
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckh,
|
||||
defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
|
||||
bc_v16i8>;
|
||||
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckh,
|
||||
defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
|
||||
bc_v8i16>;
|
||||
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckh,
|
||||
defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
|
||||
bc_v4i32>;
|
||||
defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckh,
|
||||
defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
|
||||
bc_v2i64>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Patterns for using AVX1 instructions with integer vectors
|
||||
// Here to give AVX2 priority
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
||||
|
||||
def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||
}
|
||||
|
||||
// Splat v2f64 / v2i64
|
||||
let AddedComplexity = 10 in {
|
||||
def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
|
||||
|
Loading…
x
Reference in New Issue
Block a user