diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4d678342764..eccb5468f95 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -64,17 +64,6 @@ static cl::opt UseRegMask("x86-use-regmask", static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); -static SDValue Insert128BitVector(SDValue Result, - SDValue Vec, - SDValue Idx, - SelectionDAG &DAG, - DebugLoc dl); - -static SDValue Extract128BitVector(SDValue Vec, - SDValue Idx, - SelectionDAG &DAG, - DebugLoc dl); - /// Generate a DAG to grab 128-bits from a vector > 128 bits. This /// sets things up to match to an AVX VEXTRACTF128 instruction or a /// simple subregister reference. Idx is an index in the 128 bits we @@ -9157,6 +9146,43 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } +// getTargetVShiftNOde - Handle vector element shifts where the shift amount +// may or may not be a constant. Takes immediate version of shift as input. +static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, + SDValue SrcOp, SDValue ShAmt, + SelectionDAG &DAG) { + assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); + + if (isa(ShAmt)) { + switch (Opc) { + default: llvm_unreachable("Unknown target vector shift node"); + case X86ISD::VSHLI: + case X86ISD::VSRLI: + case X86ISD::VSRAI: + return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); + } + } + + // Change opcode to non-immediate version + switch (Opc) { + default: llvm_unreachable("Unknown target vector shift node"); + case X86ISD::VSHLI: Opc = X86ISD::VSHL; break; + case X86ISD::VSRLI: Opc = X86ISD::VSRL; break; + case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; + } + + // Need to build a vector containing shift amount + // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0 + SDValue ShOps[4]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, MVT::i32); + ShOps[2] = DAG.getUNDEF(MVT::i32); + ShOps[3] = DAG.getUNDEF(MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); + ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); + return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); +} + SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); @@ -9359,24 +9385,53 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } - // Fix vector shift instructions where the last operand is a non-immediate - // i32 value. - case Intrinsic::x86_avx2_pslli_w: - case Intrinsic::x86_avx2_pslli_d: - case Intrinsic::x86_avx2_pslli_q: - case Intrinsic::x86_avx2_psrli_w: - case Intrinsic::x86_avx2_psrli_d: - case Intrinsic::x86_avx2_psrli_q: - case Intrinsic::x86_avx2_psrai_w: - case Intrinsic::x86_avx2_psrai_d: + // SSE/AVX shift intrinsics + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psra_d: + return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::x86_sse2_pslli_w: case Intrinsic::x86_sse2_pslli_d: case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrli_w: case Intrinsic::x86_sse2_psrli_d: case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), DAG); case Intrinsic::x86_sse2_psrai_w: case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), DAG); + // Fix vector shift instructions where the last operand is a non-immediate + // i32 value. case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -9390,103 +9445,40 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const return SDValue(); unsigned NewIntNo = 0; - EVT ShAmtVT = MVT::v4i32; switch (IntNo) { - case Intrinsic::x86_sse2_pslli_w: - NewIntNo = Intrinsic::x86_sse2_psll_w; + case Intrinsic::x86_mmx_pslli_w: + NewIntNo = Intrinsic::x86_mmx_psll_w; break; - case Intrinsic::x86_sse2_pslli_d: - NewIntNo = Intrinsic::x86_sse2_psll_d; + case Intrinsic::x86_mmx_pslli_d: + NewIntNo = Intrinsic::x86_mmx_psll_d; break; - case Intrinsic::x86_sse2_pslli_q: - NewIntNo = Intrinsic::x86_sse2_psll_q; + case Intrinsic::x86_mmx_pslli_q: + NewIntNo = Intrinsic::x86_mmx_psll_q; break; - case Intrinsic::x86_sse2_psrli_w: - NewIntNo = Intrinsic::x86_sse2_psrl_w; + case Intrinsic::x86_mmx_psrli_w: + NewIntNo = Intrinsic::x86_mmx_psrl_w; break; - case Intrinsic::x86_sse2_psrli_d: - NewIntNo = Intrinsic::x86_sse2_psrl_d; + case Intrinsic::x86_mmx_psrli_d: + NewIntNo = Intrinsic::x86_mmx_psrl_d; break; - case Intrinsic::x86_sse2_psrli_q: - NewIntNo = Intrinsic::x86_sse2_psrl_q; + case Intrinsic::x86_mmx_psrli_q: + NewIntNo = Intrinsic::x86_mmx_psrl_q; break; - case Intrinsic::x86_sse2_psrai_w: - NewIntNo = Intrinsic::x86_sse2_psra_w; + case Intrinsic::x86_mmx_psrai_w: + NewIntNo = Intrinsic::x86_mmx_psra_w; break; - case Intrinsic::x86_sse2_psrai_d: - NewIntNo = Intrinsic::x86_sse2_psra_d; + case Intrinsic::x86_mmx_psrai_d: + NewIntNo = Intrinsic::x86_mmx_psra_d; break; - case Intrinsic::x86_avx2_pslli_w: - NewIntNo = Intrinsic::x86_avx2_psll_w; - break; - case Intrinsic::x86_avx2_pslli_d: - NewIntNo = Intrinsic::x86_avx2_psll_d; - break; - case Intrinsic::x86_avx2_pslli_q: - NewIntNo = Intrinsic::x86_avx2_psll_q; - break; - case Intrinsic::x86_avx2_psrli_w: - NewIntNo = Intrinsic::x86_avx2_psrl_w; - break; - case Intrinsic::x86_avx2_psrli_d: - NewIntNo = Intrinsic::x86_avx2_psrl_d; - break; - case Intrinsic::x86_avx2_psrli_q: - NewIntNo = Intrinsic::x86_avx2_psrl_q; - break; - case Intrinsic::x86_avx2_psrai_w: - NewIntNo = Intrinsic::x86_avx2_psra_w; - break; - case Intrinsic::x86_avx2_psrai_d: - NewIntNo = Intrinsic::x86_avx2_psra_d; - break; - default: { - ShAmtVT = MVT::v2i32; - switch (IntNo) { - case Intrinsic::x86_mmx_pslli_w: - NewIntNo = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntNo = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntNo = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntNo = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntNo = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntNo = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntNo = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntNo = Intrinsic::x86_mmx_psra_d; - break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - } - break; - } + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. } // The vector shift intrinsics with scalars uses 32b shift amounts but // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits // to be zero. - SDValue ShOps[4]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); - if (ShAmtVT == MVT::v4i32) { - ShOps[2] = DAG.getUNDEF(MVT::i32); - ShOps[3] = DAG.getUNDEF(MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); - } else { - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt, + DAG.getConstant(0, MVT::i32)); // FIXME this must be lowered to get rid of the invalid type. - } EVT VT = Op.getValueType(); ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); @@ -10006,43 +9998,6 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { return Res; } -// getTargetVShiftNOde - Handle vector element shifts where the shift amount -// may or may not be a constant. Takes immediate version of shift as input. -static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, - SDValue SrcOp, SDValue ShAmt, - SelectionDAG &DAG) { - assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32"); - - if (isa(ShAmt)) { - switch (Opc) { - default: llvm_unreachable("Unknown target vector shift node"); - case X86ISD::VSHLI: - case X86ISD::VSRLI: - case X86ISD::VSRAI: - return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); - } - } - - // Change opcode to non-immediate version - switch (Opc) { - default: llvm_unreachable("Unknown target vector shift node"); - case X86ISD::VSHLI: Opc = X86ISD::VSHL; break; - case X86ISD::VSRLI: Opc = X86ISD::VSRL; break; - case X86ISD::VSRAI: Opc = X86ISD::VSRA; break; - } - - // Need to build a vector containing shift amount - // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0 - SDValue ShOps[4]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, MVT::i32); - ShOps[2] = DAG.getUNDEF(MVT::i32); - ShOps[3] = DAG.getUNDEF(MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); - ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt); - return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); -} - SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index da0fccc6e59..ff1062716aa 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3511,8 +3511,9 @@ multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, } multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, - string OpcodeStr, Intrinsic IntId, - Intrinsic IntId2, RegisterClass RC, + string OpcodeStr, SDNode OpNode, + SDNode OpNode2, RegisterClass RC, + ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, bit Is2Addr = 1> { // src2 is always 128-bit def rr : PDI opc, bits<8> opc2, Format ImmForm, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, VR128:$src2))]>; + [(set RC:$dst, (OpNode (DstVT RC:$src1), (SrcVT VR128:$src2)))]>; def rm : PDI; + [(set RC:$dst, (OpNode (DstVT RC:$src1), + (bc_frag (memopv2i64 addr:$src2))))]>; def ri : PDIi8; + [(set RC:$dst, (OpNode2 (DstVT RC:$src1), (i32 imm:$src2)))]>; } } // ExeDomain = SSEPackedInt @@ -3728,32 +3730,24 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, //===---------------------------------------------------------------------===// let Predicates = [HasAVX] in { -defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w, - VR128, 0>, VEX_4V; -defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d, - VR128, 0>, VEX_4V; -defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q, - VR128, 0>, VEX_4V; +defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, + VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, + VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V; -defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, - VR128, 0>, VEX_4V; -defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, - VR128, 0>, VEX_4V; -defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, - VR128, 0>, VEX_4V; +defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, + VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, + VR128, v2i64, v2i64, bc_v2i64, 0>, VEX_4V; -defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w, - VR128, 0>, VEX_4V; -defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d, - VR128, 0>, VEX_4V; +defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR128, v8i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, + VR128, v4i32, v4i32, bc_v4i32, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { // 128-bit logical shifts. @@ -3774,32 +3768,24 @@ let ExeDomain = SSEPackedInt in { } // Predicates = [HasAVX] let Predicates = [HasAVX2] in { -defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", - int_x86_avx2_psll_w, int_x86_avx2_pslli_w, - VR256, 0>, VEX_4V; -defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", - int_x86_avx2_psll_d, int_x86_avx2_pslli_d, - VR256, 0>, VEX_4V; -defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", - int_x86_avx2_psll_q, int_x86_avx2_pslli_q, - VR256, 0>, VEX_4V; +defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, + VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, + VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, + VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V; -defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", - int_x86_avx2_psrl_w, int_x86_avx2_psrli_w, - VR256, 0>, VEX_4V; -defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", - int_x86_avx2_psrl_d, int_x86_avx2_psrli_d, - VR256, 0>, VEX_4V; -defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", - int_x86_avx2_psrl_q, int_x86_avx2_psrli_q, - VR256, 0>, VEX_4V; +defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, + VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, + VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; +defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, + VR256, v4i64, v2i64, bc_v2i64, 0>, VEX_4V; -defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", - int_x86_avx2_psra_w, int_x86_avx2_psrai_w, - VR256, 0>, VEX_4V; -defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", - int_x86_avx2_psra_d, int_x86_avx2_psrai_d, - VR256, 0>, VEX_4V; +defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, + VR256, v16i16, v8i16, bc_v8i16, 0>, VEX_4V; +defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, + VR256, v8i32, v4i32, bc_v4i32, 0>, VEX_4V; let ExeDomain = SSEPackedInt in { // 256-bit logical shifts. @@ -3820,32 +3806,24 @@ let ExeDomain = SSEPackedInt in { } // Predicates = [HasAVX2] let Constraints = "$src1 = $dst" in { -defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w, - VR128>; -defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d, - VR128>; -defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q, - VR128>; +defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, + VR128, v8i16, v8i16, bc_v8i16>; +defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, + VR128, v4i32, v4i32, bc_v4i32>; +defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, + VR128, v2i64, v2i64, bc_v2i64>; -defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, - VR128>; -defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, - VR128>; -defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, - VR128>; +defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, + VR128, v8i16, v8i16, bc_v8i16>; +defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, + VR128, v4i32, v4i32, bc_v4i32>; +defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, + VR128, v2i64, v2i64, bc_v2i64>; -defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w, - VR128>; -defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d, - VR128>; +defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, + VR128, v8i16, v8i16, bc_v8i16>; +defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, + VR128, v4i32, v4i32, bc_v4i32>; let ExeDomain = SSEPackedInt in { // 128-bit logical shifts. @@ -3876,60 +3854,6 @@ let Predicates = [HasAVX] in { (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; - - def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))), - (VPSLLWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))), - (VPSLLDri VR128:$src1, imm:$src2)>; - def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))), - (VPSLLQri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (VPSRLWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (VPSRLDri VR128:$src1, imm:$src2)>; - def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (VPSRLQri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))), - (VPSRAWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))), - (VPSRADri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))), - (VPSLLWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSLLWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))), - (VPSLLDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSLLDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))), - (VPSLLQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))), - (VPSLLQrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))), - (VPSRLWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSRLWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))), - (VPSRLDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSRLDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))), - (VPSRLQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))), - (VPSRLQrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))), - (VPSRAWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSRAWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))), - (VPSRADrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSRADrm VR128:$src1, addr:$src2)>; } let Predicates = [HasAVX2] in { @@ -3937,60 +3861,6 @@ let Predicates = [HasAVX2] in { (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; - - def : Pat<(v16i16 (X86vshli VR256:$src1, (i32 imm:$src2))), - (VPSLLWYri VR256:$src1, imm:$src2)>; - def : Pat<(v8i32 (X86vshli VR256:$src1, (i32 imm:$src2))), - (VPSLLDYri VR256:$src1, imm:$src2)>; - def : Pat<(v4i64 (X86vshli VR256:$src1, (i32 imm:$src2))), - (VPSLLQYri VR256:$src1, imm:$src2)>; - - def : Pat<(v16i16 (X86vsrli VR256:$src1, (i32 imm:$src2))), - (VPSRLWYri VR256:$src1, imm:$src2)>; - def : Pat<(v8i32 (X86vsrli VR256:$src1, (i32 imm:$src2))), - (VPSRLDYri VR256:$src1, imm:$src2)>; - def : Pat<(v4i64 (X86vsrli VR256:$src1, (i32 imm:$src2))), - (VPSRLQYri VR256:$src1, imm:$src2)>; - - def : Pat<(v16i16 (X86vsrai VR256:$src1, (i32 imm:$src2))), - (VPSRAWYri VR256:$src1, imm:$src2)>; - def : Pat<(v8i32 (X86vsrai VR256:$src1, (i32 imm:$src2))), - (VPSRADYri VR256:$src1, imm:$src2)>; - - def : Pat<(v16i16 (X86vshl VR256:$src1, (v8i16 VR128:$src2))), - (VPSLLWYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v16i16 (X86vshl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSLLWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86vshl VR256:$src1, (v4i32 VR128:$src2))), - (VPSLLDYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v8i32 (X86vshl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSLLDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86vshl VR256:$src1, (v2i64 VR128:$src2))), - (VPSLLQYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v4i64 (X86vshl VR256:$src1, (memopv2i64 addr:$src2))), - (VPSLLQYrm VR256:$src1, addr:$src2)>; - - def : Pat<(v16i16 (X86vsrl VR256:$src1, (v8i16 VR128:$src2))), - (VPSRLWYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v16i16 (X86vsrl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSRLWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86vsrl VR256:$src1, (v4i32 VR128:$src2))), - (VPSRLDYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v8i32 (X86vsrl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSRLDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86vsrl VR256:$src1, (v2i64 VR128:$src2))), - (VPSRLQYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v4i64 (X86vsrl VR256:$src1, (memopv2i64 addr:$src2))), - (VPSRLQYrm VR256:$src1, addr:$src2)>; - - def : Pat<(v16i16 (X86vsra VR256:$src1, (v8i16 VR128:$src2))), - (VPSRAWYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v16i16 (X86vsra VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (VPSRAWYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86vsra VR256:$src1, (v4i32 VR128:$src2))), - (VPSRADYrr VR256:$src1, VR128:$src2)>; - def : Pat<(v8i32 (X86vsra VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (VPSRADYrm VR256:$src1, addr:$src2)>; } let Predicates = [HasSSE2] in { @@ -4006,60 +3876,6 @@ let Predicates = [HasSSE2] in { (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; - - def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))), - (PSLLWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))), - (PSLLDri VR128:$src1, imm:$src2)>; - def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))), - (PSLLQri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (PSRLWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (PSRLDri VR128:$src1, imm:$src2)>; - def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))), - (PSRLQri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))), - (PSRAWri VR128:$src1, imm:$src2)>; - def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))), - (PSRADri VR128:$src1, imm:$src2)>; - - def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))), - (PSLLWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (PSLLWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))), - (PSLLDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (PSLLDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))), - (PSLLQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))), - (PSLLQrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))), - (PSRLWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (PSRLWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))), - (PSRLDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (PSRLDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))), - (PSRLQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))), - (PSRLQrm VR128:$src1, addr:$src2)>; - - def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))), - (PSRAWrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))), - (PSRAWrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))), - (PSRADrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))), - (PSRADrm VR128:$src1, addr:$src2)>; } //===---------------------------------------------------------------------===//