mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-21 03:32:29 +00:00
Add target specific ISD node types for SSE/AVX vector shuffle instructions and change all the code that used to create intrinsic nodes to create the new nodes instead.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148664 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4a309f3d25
commit
ed2e13d667
@ -4789,7 +4789,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
|
||||
const TargetLowering &TLI, DebugLoc dl) {
|
||||
assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
|
||||
EVT ShVT = MVT::v2i64;
|
||||
unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
|
||||
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
|
||||
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT,
|
||||
DAG.getNode(Opc, dl, ShVT, SrcOp,
|
||||
@ -6587,7 +6587,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
return CommuteVectorShuffle(SVOp, DAG);
|
||||
|
||||
if (isShift) {
|
||||
// No better options. Use a vshl / vsrl.
|
||||
// No better options. Use a vshldq / vsrldq.
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
ShAmt *= EltVT.getSizeInBits();
|
||||
return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
|
||||
@ -10010,6 +10010,43 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
|
||||
return Res;
|
||||
}
|
||||
|
||||
// getTargetVShiftNOde - Handle vector element shifts where the shift amount
|
||||
// may or may not be a constant. Takes immediate version of shift as input.
|
||||
static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
|
||||
SDValue SrcOp, SDValue ShAmt,
|
||||
SelectionDAG &DAG) {
|
||||
assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
|
||||
|
||||
if (isa<ConstantSDNode>(ShAmt)) {
|
||||
switch (Opc) {
|
||||
default: llvm_unreachable("Unknown target vector shift node");
|
||||
case X86ISD::VSHLI:
|
||||
case X86ISD::VSRLI:
|
||||
case X86ISD::VSRAI:
|
||||
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
|
||||
}
|
||||
}
|
||||
|
||||
// Change opcode to non-immediate version
|
||||
switch (Opc) {
|
||||
default: llvm_unreachable("Unknown target vector shift node");
|
||||
case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
|
||||
case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
|
||||
case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
|
||||
}
|
||||
|
||||
// Need to build a vector containing shift amount
|
||||
// Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
|
||||
SDValue ShOps[4];
|
||||
ShOps[0] = ShAmt;
|
||||
ShOps[1] = DAG.getConstant(0, MVT::i32);
|
||||
ShOps[2] = DAG.getUNDEF(MVT::i32);
|
||||
ShOps[3] = DAG.getUNDEF(MVT::i32);
|
||||
ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
|
||||
ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
|
||||
return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
EVT VT = Op.getValueType();
|
||||
@ -10027,112 +10064,86 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
|
||||
uint64_t ShiftAmt = C->getZExtValue();
|
||||
|
||||
if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SHL) {
|
||||
// Make a large shift.
|
||||
SDValue SHL =
|
||||
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
// Zero out the rightmost bits.
|
||||
SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U << ShiftAmt),
|
||||
MVT::i8));
|
||||
return DAG.getNode(ISD::AND, dl, VT, SHL,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
|
||||
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
|
||||
(Subtarget->hasAVX2() &&
|
||||
(VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
|
||||
if (Op.getOpcode() == ISD::SHL)
|
||||
return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
|
||||
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
if (Op.getOpcode() == ISD::SRL)
|
||||
return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
|
||||
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
|
||||
return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
|
||||
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
}
|
||||
|
||||
if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
|
||||
if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
|
||||
if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
|
||||
if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRL) {
|
||||
// Make a large shift.
|
||||
SDValue SRL =
|
||||
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
// Zero out the leftmost bits.
|
||||
SmallVector<SDValue, 16> V(16, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
|
||||
MVT::i8));
|
||||
return DAG.getNode(ISD::AND, dl, VT, SRL,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
|
||||
}
|
||||
|
||||
if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
|
||||
if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
|
||||
if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
|
||||
if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
|
||||
if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
|
||||
if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRA) {
|
||||
if (ShiftAmt == 7) {
|
||||
// R s>> 7 === R s< 0
|
||||
SDValue Zeros = getZeroVector(VT, /* HasSSE2 */true,
|
||||
/* HasAVX2 */false, DAG, dl);
|
||||
return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
|
||||
if (VT == MVT::v16i8) {
|
||||
if (Op.getOpcode() == ISD::SHL) {
|
||||
// Make a large shift.
|
||||
SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
|
||||
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
|
||||
// Zero out the rightmost bits.
|
||||
SmallVector<SDValue, 16> V(16,
|
||||
DAG.getConstant(uint8_t(-1U << ShiftAmt),
|
||||
MVT::i8));
|
||||
return DAG.getNode(ISD::AND, dl, VT, SHL,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
|
||||
}
|
||||
if (Op.getOpcode() == ISD::SRL) {
|
||||
// Make a large shift.
|
||||
SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
|
||||
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
|
||||
// Zero out the leftmost bits.
|
||||
SmallVector<SDValue, 16> V(16,
|
||||
DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
|
||||
MVT::i8));
|
||||
return DAG.getNode(ISD::AND, dl, VT, SRL,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
|
||||
}
|
||||
if (Op.getOpcode() == ISD::SRA) {
|
||||
if (ShiftAmt == 7) {
|
||||
// R s>> 7 === R s< 0
|
||||
SDValue Zeros = getZeroVector(VT, /* HasSSE2 */true,
|
||||
/* HasAVX2 */false, DAG, dl);
|
||||
return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R);
|
||||
}
|
||||
|
||||
// R s>> a === ((R u>> a) ^ m) - m
|
||||
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
|
||||
SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
|
||||
MVT::i8));
|
||||
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
|
||||
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
|
||||
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
|
||||
return Res;
|
||||
// R s>> a === ((R u>> a) ^ m) - m
|
||||
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
|
||||
SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
|
||||
MVT::i8));
|
||||
SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
|
||||
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
|
||||
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
|
||||
return Res;
|
||||
}
|
||||
}
|
||||
|
||||
if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
|
||||
if (Op.getOpcode() == ISD::SHL) {
|
||||
// Make a large shift.
|
||||
SDValue SHL =
|
||||
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
|
||||
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
|
||||
// Zero out the rightmost bits.
|
||||
SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U << ShiftAmt),
|
||||
MVT::i8));
|
||||
SmallVector<SDValue, 32> V(32,
|
||||
DAG.getConstant(uint8_t(-1U << ShiftAmt),
|
||||
MVT::i8));
|
||||
return DAG.getNode(ISD::AND, dl, VT, SHL,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
|
||||
}
|
||||
if (Op.getOpcode() == ISD::SRL) {
|
||||
// Make a large shift.
|
||||
SDValue SRL =
|
||||
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
|
||||
R, DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
|
||||
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
|
||||
// Zero out the leftmost bits.
|
||||
SmallVector<SDValue, 32> V(32, DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
|
||||
MVT::i8));
|
||||
SmallVector<SDValue, 32> V(32,
|
||||
DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
|
||||
MVT::i8));
|
||||
return DAG.getNode(ISD::AND, dl, VT, SRL,
|
||||
DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
|
||||
}
|
||||
@ -10159,9 +10170,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
// Lower SHL with variable shift amount.
|
||||
if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
|
||||
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
|
||||
Op.getOperand(1), DAG.getConstant(23, MVT::i32));
|
||||
Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
|
||||
DAG.getConstant(23, MVT::i32));
|
||||
|
||||
ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
|
||||
|
||||
@ -10181,47 +10191,41 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
|
||||
|
||||
// a = a << 5;
|
||||
Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
|
||||
Op.getOperand(1), DAG.getConstant(5, MVT::i32));
|
||||
Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
|
||||
DAG.getConstant(5, MVT::i32));
|
||||
Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
|
||||
|
||||
// Turn 'a' into a mask suitable for VSELECT
|
||||
SDValue VSelM = DAG.getConstant(0x80, VT);
|
||||
SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
|
||||
OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
|
||||
OpVSel, VSelM);
|
||||
OpVSel = DAG.getNode(X86ISD::PCMPEQB, dl, VT, OpVSel, VSelM);
|
||||
|
||||
SDValue CM1 = DAG.getConstant(0x0f, VT);
|
||||
SDValue CM2 = DAG.getConstant(0x3f, VT);
|
||||
|
||||
// r = VSELECT(r, psllw(r & (char16)15, 4), a);
|
||||
SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
|
||||
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
|
||||
DAG.getConstant(4, MVT::i32));
|
||||
M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
|
||||
DAG.getConstant(4, MVT::i32), DAG);
|
||||
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
|
||||
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
|
||||
|
||||
// a += a
|
||||
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
|
||||
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
|
||||
OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
|
||||
OpVSel, VSelM);
|
||||
OpVSel = DAG.getNode(X86ISD::PCMPEQB, dl, VT, OpVSel, VSelM);
|
||||
|
||||
// r = VSELECT(r, psllw(r & (char16)63, 2), a);
|
||||
M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
|
||||
M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
|
||||
DAG.getConstant(2, MVT::i32));
|
||||
M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
|
||||
DAG.getConstant(2, MVT::i32), DAG);
|
||||
M = DAG.getNode(ISD::BITCAST, dl, VT, M);
|
||||
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
|
||||
|
||||
// a += a
|
||||
Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
|
||||
OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
|
||||
OpVSel = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pcmpeq_b, MVT::i32),
|
||||
OpVSel, VSelM);
|
||||
OpVSel = DAG.getNode(X86ISD::PCMPEQB, dl, VT, OpVSel, VSelM);
|
||||
|
||||
// return VSELECT(r, r+r, a);
|
||||
R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
|
||||
@ -10231,7 +10235,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
// Decompose 256-bit shifts into smaller 128-bit shifts.
|
||||
if (VT.getSizeInBits() == 256) {
|
||||
int NumElems = VT.getVectorNumElements();
|
||||
unsigned NumElems = VT.getVectorNumElements();
|
||||
MVT EltVT = VT.getVectorElementType().getSimpleVT();
|
||||
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
|
||||
|
||||
@ -10246,9 +10250,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
|
||||
// Constant shift amount
|
||||
SmallVector<SDValue, 4> Amt1Csts;
|
||||
SmallVector<SDValue, 4> Amt2Csts;
|
||||
for (int i = 0; i < NumElems/2; ++i)
|
||||
for (unsigned i = 0; i != NumElems/2; ++i)
|
||||
Amt1Csts.push_back(Amt->getOperand(i));
|
||||
for (int i = NumElems/2; i < NumElems; ++i)
|
||||
for (unsigned i = NumElems/2; i != NumElems; ++i)
|
||||
Amt2Csts.push_back(Amt->getOperand(i));
|
||||
|
||||
Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
|
||||
@ -10354,72 +10358,52 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
|
||||
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (Subtarget->hasSSE2() && VT.isVector()) {
|
||||
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
|
||||
ExtraVT.getScalarType().getSizeInBits();
|
||||
SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
|
||||
if (!Subtarget->hasSSE2() || !VT.isVector())
|
||||
return SDValue();
|
||||
|
||||
unsigned SHLIntrinsicsID = 0;
|
||||
unsigned SRAIntrinsicsID = 0;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
|
||||
ExtraVT.getScalarType().getSizeInBits();
|
||||
SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
|
||||
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: return SDValue();
|
||||
case MVT::v8i32:
|
||||
case MVT::v16i16:
|
||||
if (!Subtarget->hasAVX())
|
||||
return SDValue();
|
||||
case MVT::v4i32:
|
||||
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
|
||||
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
|
||||
break;
|
||||
case MVT::v8i16:
|
||||
SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
|
||||
SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
|
||||
break;
|
||||
case MVT::v8i32:
|
||||
case MVT::v16i16:
|
||||
if (!Subtarget->hasAVX())
|
||||
return SDValue();
|
||||
if (!Subtarget->hasAVX2()) {
|
||||
// needs to be split
|
||||
int NumElems = VT.getVectorNumElements();
|
||||
SDValue Idx0 = DAG.getConstant(0, MVT::i32);
|
||||
SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
|
||||
if (!Subtarget->hasAVX2()) {
|
||||
// needs to be split
|
||||
int NumElems = VT.getVectorNumElements();
|
||||
SDValue Idx0 = DAG.getConstant(0, MVT::i32);
|
||||
SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
|
||||
|
||||
// Extract the LHS vectors
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
|
||||
SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
|
||||
// Extract the LHS vectors
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
|
||||
SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
|
||||
|
||||
MVT EltVT = VT.getVectorElementType().getSimpleVT();
|
||||
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
|
||||
MVT EltVT = VT.getVectorElementType().getSimpleVT();
|
||||
EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
|
||||
|
||||
EVT ExtraEltVT = ExtraVT.getVectorElementType();
|
||||
int ExtraNumElems = ExtraVT.getVectorNumElements();
|
||||
ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
|
||||
ExtraNumElems/2);
|
||||
SDValue Extra = DAG.getValueType(ExtraVT);
|
||||
EVT ExtraEltVT = ExtraVT.getVectorElementType();
|
||||
int ExtraNumElems = ExtraVT.getVectorNumElements();
|
||||
ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
|
||||
ExtraNumElems/2);
|
||||
SDValue Extra = DAG.getValueType(ExtraVT);
|
||||
|
||||
LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
|
||||
LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
|
||||
LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
|
||||
LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
|
||||
}
|
||||
if (VT == MVT::v8i32) {
|
||||
SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_d;
|
||||
SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_d;
|
||||
} else {
|
||||
SHLIntrinsicsID = Intrinsic::x86_avx2_pslli_w;
|
||||
SRAIntrinsicsID = Intrinsic::x86_avx2_psrai_w;
|
||||
}
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
|
||||
}
|
||||
// fall through
|
||||
case MVT::v4i32:
|
||||
case MVT::v8i16: {
|
||||
SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
|
||||
Op.getOperand(0), ShAmt, DAG);
|
||||
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
|
||||
}
|
||||
|
||||
SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(SHLIntrinsicsID, MVT::i32),
|
||||
Op.getOperand(0), ShAmt);
|
||||
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(SRAIntrinsicsID, MVT::i32),
|
||||
Tmp1, ShAmt);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
||||
@ -10951,8 +10935,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
|
||||
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
|
||||
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
||||
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
|
||||
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
|
||||
case X86ISD::VSHL: return "X86ISD::VSHL";
|
||||
case X86ISD::VSRL: return "X86ISD::VSRL";
|
||||
case X86ISD::VSRA: return "X86ISD::VSRA";
|
||||
case X86ISD::VSHLI: return "X86ISD::VSHLI";
|
||||
case X86ISD::VSRLI: return "X86ISD::VSRLI";
|
||||
case X86ISD::VSRAI: return "X86ISD::VSRAI";
|
||||
case X86ISD::CMPPD: return "X86ISD::CMPPD";
|
||||
case X86ISD::CMPPS: return "X86ISD::CMPPS";
|
||||
case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB";
|
||||
@ -13485,77 +13475,37 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
|
||||
default:
|
||||
llvm_unreachable("Unknown shift opcode!");
|
||||
case ISD::SHL:
|
||||
if (VT == MVT::v2i64)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v4i32)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v8i16)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v4i64)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v8i32)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v16i16)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
break;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: return SDValue();
|
||||
case MVT::v2i64:
|
||||
case MVT::v4i32:
|
||||
case MVT::v8i16:
|
||||
case MVT::v4i64:
|
||||
case MVT::v8i32:
|
||||
case MVT::v16i16:
|
||||
return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
|
||||
}
|
||||
case ISD::SRA:
|
||||
if (VT == MVT::v4i32)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v8i16)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v8i32)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v16i16)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
break;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: return SDValue();
|
||||
case MVT::v4i32:
|
||||
case MVT::v8i16:
|
||||
case MVT::v8i32:
|
||||
case MVT::v16i16:
|
||||
return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
|
||||
}
|
||||
case ISD::SRL:
|
||||
if (VT == MVT::v2i64)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v4i32)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v8i16)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v4i64)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v8i32)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
if (VT == MVT::v16i16)
|
||||
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32),
|
||||
ValOp, BaseShAmt);
|
||||
break;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: return SDValue();
|
||||
case MVT::v2i64:
|
||||
case MVT::v4i32:
|
||||
case MVT::v8i16:
|
||||
case MVT::v4i64:
|
||||
case MVT::v8i32:
|
||||
case MVT::v16i16:
|
||||
return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
||||
@ -13791,24 +13741,29 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
Mask = Mask.getOperand(0);
|
||||
EVT MaskVT = Mask.getValueType();
|
||||
|
||||
// Validate that the Mask operand is a vector sra node. The sra node
|
||||
// will be an intrinsic.
|
||||
if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
|
||||
return SDValue();
|
||||
|
||||
// Validate that the Mask operand is a vector sra node.
|
||||
// FIXME: what to do for bytes, since there is a psignb/pblendvb, but
|
||||
// there is no psrai.b
|
||||
switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
|
||||
case Intrinsic::x86_sse2_psrai_w:
|
||||
case Intrinsic::x86_sse2_psrai_d:
|
||||
case Intrinsic::x86_avx2_psrai_w:
|
||||
case Intrinsic::x86_avx2_psrai_d:
|
||||
break;
|
||||
default: return SDValue();
|
||||
}
|
||||
SDValue SraSrc, SraC;
|
||||
if (Mask.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
|
||||
switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
|
||||
case Intrinsic::x86_sse2_psrai_w:
|
||||
case Intrinsic::x86_sse2_psrai_d:
|
||||
case Intrinsic::x86_avx2_psrai_w:
|
||||
case Intrinsic::x86_avx2_psrai_d:
|
||||
break;
|
||||
default: return SDValue();
|
||||
}
|
||||
|
||||
SraSrc = Mask.getOperand(1);
|
||||
SraC = Mask.getOperand(2);
|
||||
} else if (Mask.getOpcode() == X86ISD::VSRAI) {
|
||||
SraSrc = Mask.getOperand(0);
|
||||
SraC = Mask.getOperand(1);
|
||||
} else
|
||||
return SDValue();
|
||||
|
||||
// Check that the SRA is all signbits.
|
||||
SDValue SraC = Mask.getOperand(2);
|
||||
unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
|
||||
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
|
||||
if ((SraAmt + 1) != EltBits)
|
||||
@ -13823,11 +13778,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
|
||||
Y = Y.getOperand(0);
|
||||
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
|
||||
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
|
||||
X.getValueType() == MaskVT && X.getValueType() == Y.getValueType() &&
|
||||
(EltBits == 8 || EltBits == 16 || EltBits == 32)) {
|
||||
SDValue Sign = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X,
|
||||
Mask.getOperand(1));
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Sign);
|
||||
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
|
||||
assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
|
||||
"Unsupported VT for PSIGN");
|
||||
Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, SraSrc);
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
|
||||
}
|
||||
// PBLENDVB only available on SSE 4.1
|
||||
if (!Subtarget->hasSSE41())
|
||||
|
@ -219,8 +219,14 @@ namespace llvm {
|
||||
// VZEXT_MOVL - Vector move low and zero extend.
|
||||
VZEXT_MOVL,
|
||||
|
||||
// VSHL, VSRL - Vector logical left / right shift.
|
||||
VSHL, VSRL,
|
||||
// VSHL, VSRL - 128-bit vector logical left / right shift
|
||||
VSHLDQ, VSRLDQ,
|
||||
|
||||
// VSHL, VSRL, VSRA - Vector shift elements
|
||||
VSHL, VSRL, VSRA,
|
||||
|
||||
// VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
|
||||
VSHLI, VSRLI, VSRAI,
|
||||
|
||||
// CMPPD, CMPPS - Vector double/float comparison.
|
||||
// CMPPD, CMPPS - Vector double/float comparison.
|
||||
|
@ -73,8 +73,8 @@ def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
||||
SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
|
||||
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||
def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
|
||||
def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
|
||||
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
|
||||
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
|
||||
def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
|
||||
def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
|
||||
def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
|
||||
@ -86,6 +86,20 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
|
||||
def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
|
||||
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
|
||||
|
||||
def X86vshl : SDNode<"X86ISD::VSHL",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisVec<2>]>>;
|
||||
def X86vsrl : SDNode<"X86ISD::VSRL",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisVec<2>]>>;
|
||||
def X86vsra : SDNode<"X86ISD::VSRA",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
|
||||
SDTCisVec<2>]>>;
|
||||
|
||||
def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
|
||||
def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
|
||||
def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
|
||||
|
||||
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
|
||||
SDTCisVec<1>,
|
||||
SDTCisSameAs<2, 1>]>;
|
||||
|
@ -3884,10 +3884,64 @@ let Predicates = [HasAVX] in {
|
||||
(VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
|
||||
|
||||
// Shift up / down and insert zero's.
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
|
||||
def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
|
||||
(VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
|
||||
def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
|
||||
(VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
|
||||
def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSLLWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSLLDri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSLLQri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRLWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRLDri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRLQri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRAWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
|
||||
(VPSRADri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSLLWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSLLWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSLLDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSLLDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
|
||||
(VPSLLQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
|
||||
(VPSLLQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSRLWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSRLWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSRLDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSRLDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
|
||||
(VPSRLQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
|
||||
(VPSRLQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSRAWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSRAWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSRADrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSRADrm VR128:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
@ -3895,6 +3949,60 @@ let Predicates = [HasAVX2] in {
|
||||
(VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
|
||||
def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
|
||||
(VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
|
||||
|
||||
def : Pat<(v16i16 (X86vshli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSLLWYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v8i32 (X86vshli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSLLDYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i64 (X86vshli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSLLQYri VR256:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vsrli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRLWYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsrli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRLDYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i64 (X86vsrli VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRLQYri VR256:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vsrai VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRAWYri VR256:$src1, imm:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsrai VR256:$src1, (i32 imm:$src2))),
|
||||
(VPSRADYri VR256:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vshl VR256:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSLLWYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v16i16 (X86vshl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSLLWYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86vshl VR256:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSLLDYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i32 (X86vshl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSLLDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86vshl VR256:$src1, (v2i64 VR128:$src2))),
|
||||
(VPSLLQYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i64 (X86vshl VR256:$src1, (memopv2i64 addr:$src2))),
|
||||
(VPSLLQYrm VR256:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vsrl VR256:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSRLWYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v16i16 (X86vsrl VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSRLWYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsrl VR256:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSRLDYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsrl VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSRLDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86vsrl VR256:$src1, (v2i64 VR128:$src2))),
|
||||
(VPSRLQYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i64 (X86vsrl VR256:$src1, (memopv2i64 addr:$src2))),
|
||||
(VPSRLQYrm VR256:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v16i16 (X86vsra VR256:$src1, (v8i16 VR128:$src2))),
|
||||
(VPSRAWYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v16i16 (X86vsra VR256:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(VPSRAWYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsra VR256:$src1, (v4i32 VR128:$src2))),
|
||||
(VPSRADYrr VR256:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i32 (X86vsra VR256:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(VPSRADYrm VR256:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
@ -3906,10 +4014,64 @@ let Predicates = [HasSSE2] in {
|
||||
(PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
|
||||
|
||||
// Shift up / down and insert zero's.
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
|
||||
def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
|
||||
(PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
|
||||
def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
|
||||
(PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
|
||||
|
||||
def : Pat<(v8i16 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSLLWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSLLDri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSLLQri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRLWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRLDri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrli VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRLQri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrai VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRAWri VR128:$src1, imm:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrai VR128:$src1, (i32 imm:$src2))),
|
||||
(PSRADri VR128:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vshl VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(PSLLWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vshl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(PSLLWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshl VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(PSLLDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vshl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(PSLLDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src1, (v2i64 VR128:$src2))),
|
||||
(PSLLQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86vshl VR128:$src1, (memopv2i64 addr:$src2))),
|
||||
(PSLLQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsrl VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(PSRLWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vsrl VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(PSRLWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrl VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(PSRLDrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsrl VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(PSRLDrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrl VR128:$src1, (v2i64 VR128:$src2))),
|
||||
(PSRLQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86vsrl VR128:$src1, (memopv2i64 addr:$src2))),
|
||||
(PSRLQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def : Pat<(v8i16 (X86vsra VR128:$src1, (v8i16 VR128:$src2))),
|
||||
(PSRAWrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v8i16 (X86vsra VR128:$src1, (bc_v8i16 (memopv2i64 addr:$src2)))),
|
||||
(PSRAWrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsra VR128:$src1, (v4i32 VR128:$src2))),
|
||||
(PSRADrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v4i32 (X86vsra VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||
(PSRADrm VR128:$src1, addr:$src2)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
Loading…
x
Reference in New Issue
Block a user