mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 00:32:55 +00:00
- More efficient extract_vector_elt with shuffle and movss, movsd, movd, etc.
- Some bug fixes and naming inconsistency fixes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27377 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
630ebaf1f2
commit
11e15b38e9
@ -280,6 +280,7 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::LOAD, MVT::v4f32, Legal);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
|
||||
}
|
||||
|
||||
if (Subtarget->hasSSE2()) {
|
||||
@ -316,7 +317,9 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
|
||||
}
|
||||
|
||||
@ -1484,11 +1487,20 @@ bool X86::isSHUFPMask(SDNode *N) {
|
||||
// Dest { 2, 1 } <= shuffle( Dest { 1, 0 }, Src { 3, 2 }
|
||||
// Expect bit 0 == 1, bit1 == 2
|
||||
SDOperand Bit0 = N->getOperand(0);
|
||||
if (Bit0.getOpcode() != ISD::UNDEF) {
|
||||
assert(isa<ConstantSDNode>(Bit0) && "Invalid VECTOR_SHUFFLE mask!");
|
||||
if (cast<ConstantSDNode>(Bit0)->getValue() != 1)
|
||||
return false;
|
||||
}
|
||||
|
||||
SDOperand Bit1 = N->getOperand(1);
|
||||
assert(isa<ConstantSDNode>(Bit0) && isa<ConstantSDNode>(Bit1) &&
|
||||
"Invalid VECTOR_SHUFFLE mask!");
|
||||
return (cast<ConstantSDNode>(Bit0)->getValue() == 1 &&
|
||||
cast<ConstantSDNode>(Bit1)->getValue() == 2);
|
||||
if (Bit1.getOpcode() != ISD::UNDEF) {
|
||||
assert(isa<ConstantSDNode>(Bit1) && "Invalid VECTOR_SHUFFLE mask!");
|
||||
if (cast<ConstantSDNode>(Bit1)->getValue() != 2)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (NumElems != 4) return false;
|
||||
@ -2660,15 +2672,55 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
||||
return SDOperand();
|
||||
}
|
||||
case ISD::EXTRACT_VECTOR_ELT: {
|
||||
// Transform it so it match pextrw which produces a 32-bit result.
|
||||
if (!isa<ConstantSDNode>(Op.getOperand(1)))
|
||||
return SDOperand();
|
||||
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
if (MVT::getSizeInBits(VT) == 16) {
|
||||
// Transform it so it match pextrw which produces a 32-bit result.
|
||||
MVT::ValueType EVT = (MVT::ValueType)(VT+1);
|
||||
SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
|
||||
Op.getOperand(0), Op.getOperand(1));
|
||||
SDOperand Assert = DAG.getNode(ISD::AssertZext, EVT, Extract,
|
||||
DAG.getValueType(VT));
|
||||
return DAG.getNode(ISD::TRUNCATE, VT, Assert);
|
||||
} else if (MVT::getSizeInBits(VT) == 32) {
|
||||
SDOperand Vec = Op.getOperand(0);
|
||||
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
|
||||
if (Idx == 0)
|
||||
return Op;
|
||||
|
||||
// TODO: if Idex == 2, we can use unpckhps
|
||||
// SHUFPS the element to the lowest double word, then movss.
|
||||
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
|
||||
SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4,
|
||||
MVT::getVectorBaseType(MaskVT));
|
||||
std::vector<SDOperand> IdxVec;
|
||||
IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
|
||||
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
|
||||
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
|
||||
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
|
||||
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
|
||||
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
|
||||
Vec, Vec, Mask);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
|
||||
DAG.getConstant(0, MVT::i32));
|
||||
} else if (MVT::getSizeInBits(VT) == 64) {
|
||||
SDOperand Vec = Op.getOperand(0);
|
||||
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
|
||||
if (Idx == 0)
|
||||
return Op;
|
||||
|
||||
// UNPCKHPD the element to the lowest double word, then movsd.
|
||||
MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
|
||||
std::vector<SDOperand> IdxVec;
|
||||
IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
|
||||
IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
|
||||
SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
|
||||
Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
|
||||
Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
|
||||
DAG.getConstant(0, MVT::i32));
|
||||
}
|
||||
|
||||
return SDOperand();
|
||||
|
@ -31,8 +31,10 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
oc == X86::FpMOV || oc == X86::MOVSSrr || oc == X86::MOVSDrr ||
|
||||
oc == X86::FsMOVAPSrr || oc == X86::FsMOVAPDrr ||
|
||||
oc == X86::MOVAPSrr || oc == X86::MOVAPDrr ||
|
||||
oc == X86::MOVSS128rr || oc == X86::MOVSD128rr ||
|
||||
oc == X86::MOVD128rr || oc == X86::MOVQ128rr) {
|
||||
oc == X86::MOVSS2PSrr || oc == X86::MOVSD2PDrr ||
|
||||
oc == X86::MOVPS2SSrr || oc == X86::MOVPD2SDrr ||
|
||||
oc == X86::MOVDI2PDIrr || oc == X86::MOVQI2PQIrr ||
|
||||
oc == X86::MOVPDI2DIrr) {
|
||||
assert(MI.getNumOperands() == 2 &&
|
||||
MI.getOperand(0).isRegister() &&
|
||||
MI.getOperand(1).isRegister() &&
|
||||
|
@ -284,6 +284,7 @@ def i16immZExt8 : PatLeaf<(i16 imm), [{
|
||||
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
|
||||
def loadi16 : PatFrag<(ops node:$ptr), (i16 (load node:$ptr))>;
|
||||
def loadi32 : PatFrag<(ops node:$ptr), (i32 (load node:$ptr))>;
|
||||
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
|
||||
|
||||
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
|
||||
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
|
||||
|
@ -226,24 +226,6 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
|
||||
"movsd {$src, $dst|$dst, $src}",
|
||||
[(store FR64:$src, addr:$dst)]>;
|
||||
|
||||
// FR32 / FR64 to 128-bit vector conversion.
|
||||
def MOVSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
|
||||
"movss {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (scalar_to_vector FR32:$src)))]>;
|
||||
def MOVSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
|
||||
"movss {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
|
||||
def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
|
||||
"movsd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (scalar_to_vector FR64:$src)))]>;
|
||||
def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
|
||||
"movsd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
|
||||
|
||||
// Arithmetic instructions
|
||||
let isTwoAddress = 1 in {
|
||||
let isCommutable = 1 in {
|
||||
@ -1122,18 +1104,6 @@ def HSUBPDrm : S3D_Intrm<0x7C, "hsubpd {$src2, $dst|$dst, $src2}",
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Move Instructions
|
||||
def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
|
||||
"movd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (scalar_to_vector R32:$src)))]>;
|
||||
def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
|
||||
"movd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
|
||||
|
||||
def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
|
||||
"movd {$src, $dst|$dst, $src}", []>;
|
||||
|
||||
def MOVDQArr : PDI<0x6F, MRMSrcReg, (ops VR128:$dst, VR128:$src),
|
||||
"movdqa {$src, $dst|$dst, $src}", []>;
|
||||
def MOVDQArm : PDI<0x6F, MRMSrcMem, (ops VR128:$dst, i128mem:$src),
|
||||
@ -1143,18 +1113,6 @@ def MOVDQAmr : PDI<0x7F, MRMDestMem, (ops i128mem:$dst, VR128:$src),
|
||||
"movdqa {$src, $dst|$dst, $src}",
|
||||
[(store (v4i32 VR128:$src), addr:$dst)]>;
|
||||
|
||||
// SSE2 instructions with XS prefix
|
||||
def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
|
||||
"movq {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (scalar_to_vector VR64:$src)))]>, XS,
|
||||
Requires<[HasSSE2]>;
|
||||
def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
|
||||
"movq {$src, $dst|$dst, $src}", []>, XS,
|
||||
Requires<[HasSSE2]>;
|
||||
def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
|
||||
"movq {$src, $dst|$dst, $src}", []>;
|
||||
|
||||
// 128-bit Integer Arithmetic
|
||||
let isTwoAddress = 1 in {
|
||||
let isCommutable = 1 in {
|
||||
@ -1549,32 +1507,102 @@ def V_SETALLONES : PDI<0x76, MRMInitReg, (ops VR128:$dst),
|
||||
"pcmpeqd $dst, $dst",
|
||||
[(set VR128:$dst, (v2f64 immAllOnesV))]>;
|
||||
|
||||
// Scalar to 128-bit vector with zero extension.
|
||||
// FR32 / FR64 to 128-bit vector conversion.
|
||||
def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, FR32:$src),
|
||||
"movss {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (scalar_to_vector FR32:$src)))]>;
|
||||
def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
|
||||
"movss {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
|
||||
def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src),
|
||||
"movsd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (scalar_to_vector FR64:$src)))]>;
|
||||
def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
|
||||
"movsd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
|
||||
|
||||
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
|
||||
"movd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (scalar_to_vector R32:$src)))]>;
|
||||
def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
|
||||
"movd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
|
||||
// SSE2 instructions with XS prefix
|
||||
def MOVQI2PQIrr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
|
||||
"movq {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (scalar_to_vector VR64:$src)))]>, XS,
|
||||
Requires<[HasSSE2]>;
|
||||
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
|
||||
"movq {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
|
||||
Requires<[HasSSE2]>;
|
||||
// FIXME: may not be able to eliminate this movss with coalescing the src and
|
||||
// dest register classes are different. We really want to write this pattern
|
||||
// like this:
|
||||
// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (i32 0))),
|
||||
// (f32 FR32:$src)>;
|
||||
def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, VR128:$src),
|
||||
"movss {$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
|
||||
(i32 0)))]>;
|
||||
def MOVPS2SSmr : SSI<0x10, MRMDestMem, (ops f32mem:$dst, VR128:$src),
|
||||
"movss {$src, $dst|$dst, $src}",
|
||||
[(store (f32 (vector_extract (v4f32 VR128:$src),
|
||||
(i32 0))), addr:$dst)]>;
|
||||
def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, VR128:$src),
|
||||
"movsd {$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
|
||||
(i32 0)))]>;
|
||||
def MOVPD2SDmr : SDI<0x10, MRMDestMem, (ops f64mem:$dst, VR128:$src),
|
||||
"movsd {$src, $dst|$dst, $src}",
|
||||
[(store (f64 (vector_extract (v2f64 VR128:$src),
|
||||
(i32 0))), addr:$dst)]>;
|
||||
def MOVPDI2DIrr : PDI<0x7E, MRMSrcReg, (ops R32:$dst, VR128:$src),
|
||||
"movd {$src, $dst|$dst, $src}",
|
||||
[(set R32:$dst, (vector_extract (v4i32 VR128:$src),
|
||||
(i32 0)))]>;
|
||||
def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
|
||||
"movd {$src, $dst|$dst, $src}",
|
||||
[(store (i32 (vector_extract (v4i32 VR128:$src),
|
||||
(i32 0))), addr:$dst)]>;
|
||||
|
||||
// Move to lower bits of a VR128, leaving upper bits alone.
|
||||
// Three operand (but two address) aliases.
|
||||
let isTwoAddress = 1 in {
|
||||
def MOVZSS128rr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
|
||||
def MOVLSS2PSrr : SSI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR32:$src2),
|
||||
"movss {$src2, $dst|$dst, $src2}", []>;
|
||||
def MOVZSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
|
||||
def MOVLSD2PDrr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, VR128:$src1, FR64:$src2),
|
||||
"movsd {$src2, $dst|$dst, $src2}", []>;
|
||||
def MOVZD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
|
||||
def MOVLDI2PDIrr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, R32:$src2),
|
||||
"movd {$src2, $dst|$dst, $src2}", []>;
|
||||
def MOVZQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR64:$src2),
|
||||
"movq {$src2, $dst|$dst, $src2}", []>;
|
||||
}
|
||||
|
||||
// Move to lower bits of a VR128 and zeroing upper bits.
|
||||
// Loading from memory automatically zeroing upper bits.
|
||||
def MOVZSS128rm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
|
||||
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
|
||||
"movss {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4f32 (X86zexts2vec (loadf32 addr:$src))))]>;
|
||||
def MOVZSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
|
||||
def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
|
||||
"movsd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (X86zexts2vec (loadf64 addr:$src))))]>;
|
||||
def MOVZD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
|
||||
"movd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
|
||||
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
|
||||
"movd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v4i32 (X86zexts2vec (loadi32 addr:$src))))]>;
|
||||
def MOVZQI2PQIrm : PDI<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
|
||||
"movd {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2i64 (X86zexts2vec (loadi64 addr:$src))))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Non-Instruction Patterns
|
||||
@ -1621,9 +1649,9 @@ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
|
||||
|
||||
// Scalar to v8i16 / v16i8. The source may be a R32, but only the lower 8 or
|
||||
// 16-bits matter.
|
||||
def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
|
||||
def : Pat<(v8i16 (X86s2vec R32:$src)), (MOVDI2PDIrr R32:$src)>,
|
||||
Requires<[HasSSE2]>;
|
||||
def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVD128rr R32:$src)>,
|
||||
def : Pat<(v16i8 (X86s2vec R32:$src)), (MOVDI2PDIrr R32:$src)>,
|
||||
Requires<[HasSSE2]>;
|
||||
|
||||
// bit_convert
|
||||
@ -1659,17 +1687,15 @@ def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>,
|
||||
|
||||
// Zeroing a VR128 then do a MOVS* to the lower bits.
|
||||
def : Pat<(v2f64 (X86zexts2vec FR64:$src)),
|
||||
(MOVZSD128rr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
|
||||
(MOVLSD2PDrr (V_SET0_PD), FR64:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4f32 (X86zexts2vec FR32:$src)),
|
||||
(MOVZSS128rr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2i64 (X86zexts2vec VR64:$src)),
|
||||
(MOVZQ128rr (V_SET0_PI), VR64:$src)>, Requires<[HasSSE2]>;
|
||||
(MOVLSS2PSrr (V_SET0_PS), FR32:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4i32 (X86zexts2vec R32:$src)),
|
||||
(MOVZD128rr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
|
||||
(MOVLDI2PDIrr (V_SET0_PI), R32:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v8i16 (X86zexts2vec R16:$src)),
|
||||
(MOVZD128rr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
|
||||
(MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr16 R16:$src))>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v16i8 (X86zexts2vec R8:$src)),
|
||||
(MOVZD128rr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
|
||||
(MOVLDI2PDIrr (V_SET0_PI), (MOVZX32rr8 R8:$src))>, Requires<[HasSSE2]>;
|
||||
|
||||
// Splat v2f64 / v2i64
|
||||
def : Pat<(vector_shuffle (v2f64 VR128:$src), (undef), SSE_splat_mask:$sm),
|
||||
|
Loading…
x
Reference in New Issue
Block a user