diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ae955fad11e..41316dd9a1b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -303,14 +303,18 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM) setOperationAction(ISD::LOAD, MVT::v8i16, Legal); setOperationAction(ISD::LOAD, MVT::v4i32, Legal); setOperationAction(ISD::LOAD, MVT::v2i64, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); } computeRegisterProperties(); @@ -1499,6 +1503,29 @@ bool X86::isUNPCKHPDMask(SDNode *N) { cast(Bit1)->getValue() == 3); } +/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to UNPCKL. +bool X86::isUNPCKLMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { + SDOperand BitI = N->getOperand(i); + SDOperand BitI1 = N->getOperand(i+1); + assert(isa(BitI) && isa(BitI1) && + "Invalid VECTOR_SHUFFLE mask!"); + if (cast(BitI)->getValue() != j) + return false; + if (cast(BitI1)->getValue() != j + NumElems) + return false; + } + + return true; +} + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies /// a splat of a single element. bool X86::isSplatMask(SDNode *N) { @@ -2321,6 +2348,9 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType VT = Op.getValueType(); unsigned NumElems = PermMask.getNumOperands(); + // All v2f64 cases are handled. + if (NumElems == 2) return SDOperand(); + // Handle splat cases. if (X86::isSplatMask(PermMask.Val)) { if (V2.getOpcode() == ISD::UNDEF) @@ -2332,8 +2362,8 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, DAG.getNode(ISD::UNDEF, V1.getValueType()), PermMask); - } else if (NumElems == 2) { - // All v2f64 cases are handled. + } else if (X86::isUNPCKLMask(PermMask.Val)) { + // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*. return SDOperand(); } else if (X86::isPSHUFDMask(PermMask.Val)) { if (V2.getOpcode() == ISD::UNDEF) @@ -2404,13 +2434,22 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { // : unpcklps 1, 3 ==> Y: // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> MVT::ValueType VT = Op.getValueType(); + MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems); + MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT); + std::vector MaskVec; + for (unsigned i = 0, e = NumElems/2; i != e; ++i) { + MaskVec.push_back(DAG.getConstant(i, BaseVT)); + MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT)); + } + SDOperand PermMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec); std::vector V(NumElems); for (unsigned i = 0; i < NumElems; ++i) V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i)); NumElems >>= 1; while (NumElems != 0) { for (unsigned i = 0; i < NumElems; ++i) - V[i] = DAG.getNode(X86ISD::UNPCKL, VT, V[i], V[i + NumElems]); + V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems], + PermMask); NumElems >>= 1; } return V[0]; @@ -2453,7 +2492,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::Wrapper: return "X86ISD::Wrapper"; case X86ISD::S2VEC: return "X86ISD::S2VEC"; case X86ISD::ZEXT_S2VEC: return "X86ISD::ZEXT_S2VEC"; - case X86ISD::UNPCKL: return "X86ISD::UNPCKL"; } } @@ -2543,5 +2581,6 @@ X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const { return (Mask.Val->getNumOperands() == 2 || X86::isSplatMask(Mask.Val) || X86::isPSHUFDMask(Mask.Val) || - X86::isSHUFPMask(Mask.Val)); + X86::isSHUFPMask(Mask.Val) || + X86::isUNPCKLMask(Mask.Val)); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 4ab336deeee..132c2ac81ce 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -153,10 +153,6 @@ namespace llvm { /// ZEXT_S2VEC - SCALAR_TO_VECTOR with zero extension. The destination base /// does not have to match the operand type. ZEXT_S2VEC, - - /// UNPCKL - Unpack and interleave low. This corresponds to X86::UNPCKLPS, - /// X86::PUNPCKL*. - UNPCKL, }; // X86 specific condition code. These correspond to X86_*_COND in @@ -205,6 +201,10 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to UNPCKHPD. bool isUNPCKHPDMask(SDNode *N); + /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to UNPCKL. + bool isUNPCKLMask(SDNode *N); + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element. bool isSplatMask(SDNode *N); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 4daa7ca8cb1..a94fe892c05 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -30,8 +30,6 @@ def X86zexts2vec : SDNode<"X86ISD::ZEXT_S2VEC", def SDTUnpckl : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; -def X86unpckl : SDNode<"X86ISD::UNPCKL", SDTUnpckl, - []>; //===----------------------------------------------------------------------===// // SSE pattern fragments @@ -77,6 +75,10 @@ def UNPCKHPD_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isUNPCKHPDMask(N); }], SHUFFLE_get_shuf_imm>; +def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKLMask(N); +}]>; + // Only use PSHUF if it is not a splat. def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ return !X86::isSplatMask(N) && X86::isPSHUFDMask(N); @@ -756,14 +758,17 @@ def PSHUFDrm : PDIi8<0x70, MRMSrcMem, let isTwoAddress = 1 in { def SHUFPSrr : PSIi8<0xC6, MRMSrcReg, - (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3), + (ops VR128:$dst, VR128:$src1, VR128:$src2, i32i8imm:$src3), "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR128:$dst, (vector_shuffle (v4f32 VR128:$src1), (v4f32 VR128:$src2), SHUFP_shuffle_mask:$src3))]>; def SHUFPSrm : PSIi8<0xC6, MRMSrcMem, - (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3), - "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>; + (ops VR128:$dst, VR128:$src1, f128mem:$src2, i32i8imm:$src3), + "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, (vector_shuffle + (v4f32 VR128:$src1), (load addr:$src2), + SHUFP_shuffle_mask:$src3))]>; def SHUFPDrr : PDIi8<0xC6, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2, i8imm:$src3), "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", @@ -772,7 +777,10 @@ def SHUFPDrr : PDIi8<0xC6, MRMSrcReg, SHUFP_shuffle_mask:$src3))]>; def SHUFPDrm : PDIi8<0xC6, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2, i8imm:$src3), - "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>; + "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", + [(set VR128:$dst, (vector_shuffle + (v2f64 VR128:$src1), (load addr:$src2), + SHUFP_shuffle_mask:$src3))]>; def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), @@ -789,13 +797,15 @@ def UNPCKHPDrm : PDI<0x15, MRMSrcMem, def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpcklps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1, - VR128:$src2)))]>; + [(set VR128:$dst, + (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPSrm : PSI<0x14, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), "unpcklps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (X86unpckl VR128:$src1, - (load addr:$src2))))]>; + [(set VR128:$dst, + (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpcklpd {$src2, $dst|$dst, $src2}", []>; @@ -895,33 +905,39 @@ def PSUBDrm : PDI<0xFA, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "punpcklbw {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1, - VR128:$src2)))]>; + [(set VR128:$dst, + (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), "punpcklbw {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v16i8 (X86unpckl VR128:$src1, - (load addr:$src2))))]>; + [(set VR128:$dst, + (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "punpcklwd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1, - VR128:$src2)))]>; + [(set VR128:$dst, + (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), "punpcklwd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v8i16 (X86unpckl VR128:$src1, - (load addr:$src2))))]>; + [(set VR128:$dst, + (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "punpckldq {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1, - VR128:$src2)))]>; + [(set VR128:$dst, + (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), "punpckldq {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4i32 (X86unpckl VR128:$src1, - (load addr:$src2))))]>; + [(set VR128:$dst, + (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "punpcklqdq {$src2, $dst|$dst, $src2}", []>;