From ec24e61ab0a22f58fedac374ef0df3e69d9a2587 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 30 Nov 2011 07:47:51 +0000 Subject: [PATCH] Merge VPERM2F128/VPERM2I128 ISD node types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145485 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 33 ++-------- lib/Target/X86/X86ISelLowering.h | 3 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 3 +- lib/Target/X86/X86InstrSSE.td | 82 +++++++++++++------------ 4 files changed, 50 insertions(+), 71 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0b38d556e37..df06cfff96d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2848,8 +2848,7 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::UNPCKHP: case X86ISD::PUNPCKH: case X86ISD::VPERMILP: - case X86ISD::VPERM2F128: - case X86ISD::VPERM2I128: + case X86ISD::VPERM2X128: return true; } return false; @@ -2889,8 +2888,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::PALIGN: case X86ISD::SHUFPD: case X86ISD::SHUFPS: - case X86ISD::VPERM2F128: - case X86ISD::VPERM2I128: + case X86ISD::VPERM2X128: return DAG.getNode(Opc, dl, VT, V1, V2, DAG.getConstant(TargetMask, MVT::i8)); } @@ -4616,8 +4614,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, DecodeVPERMILPMask(VT, cast(ImmN)->getZExtValue(), ShuffleMask); break; - case X86ISD::VPERM2F128: - case X86ISD::VPERM2I128: + case X86ISD::VPERM2X128: ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2F128Mask(VT, cast(ImmN)->getZExtValue(), ShuffleMask); @@ -6521,22 +6518,6 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { return 0; } -static inline unsigned getVPERM2X128Opcode(EVT VT, bool HasAVX2) { - switch(VT.getSimpleVT().SimpleTy) { - case MVT::v32i8: - case MVT::v16i16: - case MVT::v8i32: - case MVT::v4i64: - if (HasAVX2) return X86ISD::VPERM2I128; - // else use fp unit for int vperm - case MVT::v8f32: - case MVT::v4f64: return X86ISD::VPERM2F128; - default: - llvm_unreachable("Unknown type for vpermil"); - } - return 0; -} - static SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI, @@ -6858,7 +6839,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // Handle VPERM2F128/VPERM2I128 permutations if (isVPERM2X128Mask(M, VT, Subtarget->hasAVX())) - return getTargetShuffleNode(getVPERM2X128Opcode(VT, HasAVX2), dl, VT, V1, + return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); // Handle VSHUFPS/DY permutations @@ -11157,8 +11138,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PUNPCKH: return "X86ISD::PUNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; - case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128"; - case X86ISD::VPERM2I128: return "X86ISD::VPERM2I128"; + case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -14744,8 +14724,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::MOVSS: case X86ISD::MOVSD: case X86ISD::VPERMILP: - case X86ISD::VPERM2F128: - case X86ISD::VPERM2I128: + case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index be801dadc18..dca172fecaf 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -278,8 +278,7 @@ namespace llvm { PUNPCKL, PUNPCKH, VPERMILP, - VPERM2F128, - VPERM2I128, + VPERM2X128, VBROADCAST, // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index b83ca09548c..343c5b7587e 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -138,8 +138,7 @@ def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>; def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>; -def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>; -def X86VPerm2i128 : SDNode<"X86ISD::VPERM2I128", SDTShuff3OpI>; +def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b63b17b36d1..46993efe13b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7322,38 +7322,6 @@ def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), imm:$src3), (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>; -def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), - (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; - -def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, - (memopv8f32 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, - (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, - (memopv4i64 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, - (memopv4f64 addr:$src2), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, - (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, - (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), - (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; - //===----------------------------------------------------------------------===// // VZERO - Zero YMM registers // @@ -7567,29 +7535,63 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), VEX_4V; let Predicates = [HasAVX2] in { -def : Pat<(v8i32 (X86VPerm2i128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2i128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2i128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2i128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; -def : Pat<(v32i8 (X86VPerm2i128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)), +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v16i16 (X86VPerm2i128 VR256:$src1, +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v8i32 (X86VPerm2i128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; -def : Pat<(v4i64 (X86VPerm2i128 VR256:$src1, (memopv4i64 addr:$src2), +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), (i8 imm:$imm))), (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; } +// AVX1 patterns +def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), + (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; + +def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1, + (memopv8f32 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, + (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, + (memopv4i64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, + (memopv4f64 addr:$src2), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, + (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; +def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, + (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), + (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; + + //===----------------------------------------------------------------------===// // VINSERTI128 - Insert packed integer values //