diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 96c6f410719..6ebba0e4566 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2843,18 +2843,10 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: + case X86ISD::UNPCKLP: + case X86ISD::PUNPCKL: + case X86ISD::UNPCKHP: + case X86ISD::PUNPCKH: case X86ISD::VPERMILPS: case X86ISD::VPERMILPSY: case X86ISD::VPERMILPD: @@ -2920,18 +2912,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVLPD: case X86ISD::MOVSS: case X86ISD::MOVSD: - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: + case X86ISD::UNPCKLP: + case X86ISD::PUNPCKL: + case X86ISD::UNPCKHP: + case X86ISD::PUNPCKH: return DAG.getNode(Opc, dl, VT, V1, V2); } return SDValue(); @@ -4635,24 +4619,16 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, cast(ImmN)->getZExtValue(), ShuffleMask); break; - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: + case X86ISD::PUNPCKH: DecodePUNPCKHMask(NumElems, ShuffleMask); break; - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: + case X86ISD::UNPCKHP: DecodeUNPCKHPMask(VT, ShuffleMask); break; - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: + case X86ISD::PUNPCKL: DecodePUNPCKLMask(VT, ShuffleMask); break; - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: + case X86ISD::UNPCKLP: DecodeUNPCKLPMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: @@ -6568,22 +6544,20 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { - case MVT::v4i32: return X86ISD::PUNPCKLDQ; - case MVT::v2i64: return X86ISD::PUNPCKLQDQ; + case MVT::v32i8: + case MVT::v16i8: + case MVT::v16i16: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: return X86ISD::PUNPCKL; case MVT::v8i32: - if (HasAVX2) return X86ISD::PUNPCKLDQ; + case MVT::v4i64: + if (HasAVX2) return X86ISD::PUNPCKL; // else use fp unit for int unpack. case MVT::v8f32: - case MVT::v4f32: return X86ISD::UNPCKLPS; - case MVT::v4i64: - if (HasAVX2) return X86ISD::PUNPCKLQDQ; - // else use fp unit for int unpack. + case MVT::v4f32: case MVT::v4f64: - case MVT::v2f64: return X86ISD::UNPCKLPD; - case MVT::v32i8: - case MVT::v16i8: return X86ISD::PUNPCKLBW; - case MVT::v16i16: - case MVT::v8i16: return X86ISD::PUNPCKLWD; + case MVT::v2f64: return X86ISD::UNPCKLP; default: llvm_unreachable("Unknown type for unpckl"); } @@ -6592,22 +6566,20 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { - case MVT::v4i32: return X86ISD::PUNPCKHDQ; - case MVT::v2i64: return X86ISD::PUNPCKHQDQ; + case MVT::v32i8: + case MVT::v16i8: + case MVT::v16i16: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: return X86ISD::PUNPCKH; + case MVT::v4i64: case MVT::v8i32: - if (HasAVX2) return X86ISD::PUNPCKHDQ; + if (HasAVX2) return X86ISD::PUNPCKH; // else use fp unit for int unpack. case MVT::v8f32: - case MVT::v4f32: return X86ISD::UNPCKHPS; - case MVT::v4i64: - if (HasAVX2) return X86ISD::PUNPCKHQDQ; - // else use fp unit for int unpack. + case MVT::v4f32: case MVT::v4f64: - case MVT::v2f64: return X86ISD::UNPCKHPD; - case MVT::v32i8: - case MVT::v16i8: return X86ISD::PUNPCKHBW; - case MVT::v16i16: - case MVT::v8i16: return X86ISD::PUNPCKHWD; + case MVT::v2f64: return X86ISD::UNPCKHP; default: llvm_unreachable("Unknown type for unpckh"); } @@ -6910,9 +6882,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { if (VT == MVT::v2f64) - return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); + return getTargetShuffleNode(X86ISD::UNPCKLP, dl, VT, V1, V1, DAG); if (VT == MVT::v2i64) - return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); + return getTargetShuffleNode(X86ISD::PUNPCKL, dl, VT, V1, V1, DAG); } if (isPSHUFHWMask(M, VT)) @@ -11266,18 +11238,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD"; case X86ISD::MOVSD: return "X86ISD::MOVSD"; case X86ISD::MOVSS: return "X86ISD::MOVSS"; - case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; - case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; - case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; - case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; - case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; - case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD"; - case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ"; - case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ"; - case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW"; - case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD"; - case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; - case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; + case X86ISD::UNPCKLP: return "X86ISD::UNPCKLP"; + case X86ISD::UNPCKHP: return "X86ISD::UNPCKHP"; + case X86ISD::PUNPCKL: return "X86ISD::PUNPCKL"; + case X86ISD::PUNPCKH: return "X86ISD::PUNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS"; case X86ISD::VPERMILPSY: return "X86ISD::VPERMILPSY"; @@ -14857,18 +14821,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::SHUFPS: // Handle all target specific shuffles case X86ISD::SHUFPD: case X86ISD::PALIGN: - case X86ISD::PUNPCKHBW: - case X86ISD::PUNPCKHWD: - case X86ISD::PUNPCKHDQ: - case X86ISD::PUNPCKHQDQ: - case X86ISD::UNPCKHPS: - case X86ISD::UNPCKHPD: - case X86ISD::PUNPCKLBW: - case X86ISD::PUNPCKLWD: - case X86ISD::PUNPCKLDQ: - case X86ISD::PUNPCKLQDQ: - case X86ISD::UNPCKLPS: - case X86ISD::UNPCKLPD: + case X86ISD::PUNPCKH: + case X86ISD::UNPCKHP: + case X86ISD::PUNPCKL: + case X86ISD::UNPCKLP: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ccff3a5ea69..582b6b522c8 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -273,18 +273,10 @@ namespace llvm { MOVLPD, MOVSD, MOVSS, - UNPCKLPS, - UNPCKLPD, - UNPCKHPS, - UNPCKHPD, - PUNPCKLBW, - PUNPCKLWD, - PUNPCKLDQ, - PUNPCKLQDQ, - PUNPCKHBW, - PUNPCKHWD, - PUNPCKHDQ, - PUNPCKHQDQ, + UNPCKLP, + UNPCKHP, + PUNPCKL, + PUNPCKH, VPERMILPS, VPERMILPSY, VPERMILPD, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 791bbe6566c..32392dd5529 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -130,21 +130,11 @@ def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; -def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; -def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpcklp : SDNode<"X86ISD::UNPCKLP", SDTShuff2Op>; +def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>; -def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; -def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; - -def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; -def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; -def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; -def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; - -def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; -def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; -def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; -def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; +def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>; +def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>; def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>; def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 7cadac16d7d..87df492121b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1159,11 +1159,11 @@ let Predicates = [HasAVX] in { (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), (VMOVHPSrm VR128:$src1, addr:$src2)>; - // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (VMOVHPDrm VR128:$src1, addr:$src2)>; @@ -1174,10 +1174,10 @@ let Predicates = [HasAVX] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst), (VMOVHPSmr addr:$dst, VR128:$src)>; def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst), (VMOVHPDmr addr:$dst, VR128:$src)>; } @@ -1194,16 +1194,16 @@ let Predicates = [HasSSE1] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst), + (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))), addr:$dst), (MOVHPSmr addr:$dst, VR128:$src)>; } let Predicates = [HasSSE2] in { - // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem + // FIXME: Instead of X86Unpcklp, there should be a X86Movlhpd here, the problem // is during lowering, where it's not possible to recognize the load fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, + def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))), (MOVHPDrm VR128:$src1, addr:$src2)>; @@ -1214,7 +1214,7 @@ let Predicates = [HasSSE2] in { // Store patterns def : Pat<(store (f64 (vector_extract - (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst), + (v2f64 (X86Unpckhp VR128:$src, (undef))), (iPTR 0))),addr:$dst), (MOVHPDmr addr:$dst, VR128:$src)>; } @@ -2430,27 +2430,27 @@ let AddedComplexity = 10 in { } // AddedComplexity let Predicates = [HasSSE1] in { - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)), (UNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)), (UNPCKHPSrr VR128:$src1, VR128:$src2)>; } let Predicates = [HasSSE2] in { - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)), (UNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)), (UNPCKHPDrr VR128:$src1, VR128:$src2)>; - // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. @@ -2463,59 +2463,59 @@ let Predicates = [HasSSE2] in { } let Predicates = [HasAVX] in { - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpcklp VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpcklp VR128:$src1, VR128:$src2)), (VUNPCKLPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))), + def : Pat<(v4f32 (X86Unpckhp VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKHPSrm VR128:$src1, addr:$src2)>; - def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), + def : Pat<(v4f32 (X86Unpckhp VR128:$src1, VR128:$src2)), (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpcklp VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpcklp VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)), + def : Pat<(v8i32 (X86Unpcklp VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + def : Pat<(v8i32 (X86Unpcklp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpckhp VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckhp VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), + def : Pat<(v8i32 (X86Unpckhp VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)), + def : Pat<(v8i32 (X86Unpckhp VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpcklp VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKLPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpcklp VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))), + def : Pat<(v2f64 (X86Unpckhp VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKHPDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), + def : Pat<(v2f64 (X86Unpckhp VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpcklp VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpcklp VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4i64 (X86Unpcklp VR256:$src1, (memopv4i64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)), + def : Pat<(v4i64 (X86Unpcklp VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckhp VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckhp VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4i64 (X86Unpckhp VR256:$src1, (memopv4i64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)), + def : Pat<(v4i64 (X86Unpckhp VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the + // FIXME: Instead of X86Movddup, there should be a X86Unpcklp here, the // problem is during lowering, where it's not possible to recognize the load // fold cause it has two uses through a bitcast. One use disappears at isel // time and the fold opportunity reappears. @@ -4198,62 +4198,62 @@ multiclass sse2_unpack_y opc, string OpcodeStr, ValueType vt, } let Predicates = [HasAVX] in { - defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw, + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpckl, bc_v16i8, 0>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd, + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpckl, bc_v8i16, 0>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq, + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckl, bc_v4i32, 0>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpcklqdq, + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Punpckl, bc_v2i64, 0>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw, + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckh, bc_v16i8, 0>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd, + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckh, bc_v8i16, 0>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq, + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckh, bc_v4i32, 0>, VEX_4V; - defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckhqdq, + defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Punpckh, bc_v2i64, 0>, VEX_4V; } let Predicates = [HasAVX2] in { - defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpckl, bc_v32i8>, VEX_4V; - defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd, + defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpckl, bc_v16i16>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckl, bc_v8i32>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpcklqdq, + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Punpckl, bc_v4i64>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckh, bc_v32i8>, VEX_4V; - defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd, + defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckh, bc_v16i16>, VEX_4V; - defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, + defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckh, bc_v8i32>, VEX_4V; - defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckhqdq, + defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Punpckh, bc_v4i64>, VEX_4V; } let Constraints = "$src1 = $dst" in { - defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpckl, bc_v16i8>; - defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpckl, bc_v8i16>; - defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckl, bc_v4i32>; - defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpcklqdq, + defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Punpckl, bc_v2i64>; - defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckh, bc_v16i8>; - defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckh, bc_v8i16>; - defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckh, bc_v4i32>; - defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckhqdq, + defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Punpckh, bc_v2i64>; } } // ExeDomain = SSEPackedInt