mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-06 23:32:27 +00:00
Merge decoding of VPERMILPD and VPERMILPS shuffle masks. Merge X86ISD node type for VPERMILPD/PS. Add instruction selection support for VINSERTI128/VEXTRACTI128.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145483 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
553fe05f23
commit
316cd2a2c5
@ -309,32 +309,32 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VPERMILPSmi:
|
||||
DecodeVPERMILPSMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
DecodeVPERMILPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VPERMILPSYri:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VPERMILPSYmi:
|
||||
DecodeVPERMILPSMask(8, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
DecodeVPERMILPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VPERMILPDri:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VPERMILPDmi:
|
||||
DecodeVPERMILPDMask(2, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
DecodeVPERMILPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VPERMILPDYri:
|
||||
Src1Name = getRegName(MI->getOperand(1).getReg());
|
||||
// FALL THROUGH.
|
||||
case X86::VPERMILPDYmi:
|
||||
DecodeVPERMILPDMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
DecodeVPERMILPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
|
||||
ShuffleMask);
|
||||
DestName = getRegName(MI->getOperand(0).getReg());
|
||||
break;
|
||||
case X86::VPERM2F128rr:
|
||||
|
@ -193,36 +193,23 @@ void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
}
|
||||
}
|
||||
|
||||
// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
|
||||
// elements. For 256-bit vectors, it's considered as two 128 lanes, the
|
||||
// referenced elements can't cross lanes and the mask of the first lane must
|
||||
// be the same of the second.
|
||||
void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
unsigned NumLanes = (NumElts*32)/128;
|
||||
unsigned LaneSize = NumElts/NumLanes;
|
||||
// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit
|
||||
// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128
|
||||
// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of
|
||||
// the first lane must be the same of the second.
|
||||
void DecodeVPERMILPMask(EVT VT, unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
unsigned NumLanes = VT.getSizeInBits() / 128;
|
||||
unsigned NumLaneElts = NumElts / NumLanes;
|
||||
|
||||
for (unsigned l = 0; l != NumLanes; ++l) {
|
||||
for (unsigned i = 0; i != LaneSize; ++i) {
|
||||
unsigned Idx = (Imm >> (i*2)) & 0x3 ;
|
||||
ShuffleMask.push_back(Idx+(l*LaneSize));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
|
||||
// elements. For 256-bit vectors, it's considered as two 128 lanes, the
|
||||
// referenced elements can't cross lanes but the mask of the first lane can
|
||||
// be the different of the second (not like VPERMILPS).
|
||||
void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask) {
|
||||
unsigned NumLanes = (NumElts*64)/128;
|
||||
unsigned LaneSize = NumElts/NumLanes;
|
||||
|
||||
for (unsigned l = 0; l < NumLanes; ++l) {
|
||||
for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) {
|
||||
unsigned Idx = (Imm >> i) & 0x1;
|
||||
ShuffleMask.push_back(Idx+(l*LaneSize));
|
||||
unsigned LaneStart = l*NumLaneElts;
|
||||
for (unsigned i = 0; i != NumLaneElts; ++i) {
|
||||
unsigned Idx = NumLaneElts == 4 ? (Imm >> (i*2)) & 0x3
|
||||
: (Imm >> (i+LaneStart)) & 0x1;
|
||||
ShuffleMask.push_back(Idx+LaneStart);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -78,18 +78,11 @@ void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
|
||||
// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
|
||||
// elements. For 256-bit vectors, it's considered as two 128 lanes, the
|
||||
// referenced elements can't cross lanes and the mask of the first lane must
|
||||
// be the same of the second.
|
||||
void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
|
||||
// elements. For 256-bit vectors, it's considered as two 128 lanes, the
|
||||
// referenced elements can't cross lanes but the mask of the first lane can
|
||||
// be the different of the second (not like VPERMILPS).
|
||||
void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
|
||||
// DecodeVPERMILPMask - Decodes VPERMILPS/ VPERMILPD permutes for any 128-bit
|
||||
// 32-bit or 64-bit elements. For 256-bit vectors, it's considered as two 128
|
||||
// lanes. For VPERMILPS, referenced elements can't cross lanes and the mask of
|
||||
// the first lane must be the same of the second.
|
||||
void DecodeVPERMILPMask(EVT VT, unsigned Imm,
|
||||
SmallVectorImpl<unsigned> &ShuffleMask);
|
||||
|
||||
void DecodeVPERM2F128Mask(unsigned Imm,
|
||||
|
@ -2847,8 +2847,7 @@ static bool isTargetShuffle(unsigned Opcode) {
|
||||
case X86ISD::PUNPCKL:
|
||||
case X86ISD::UNPCKHP:
|
||||
case X86ISD::PUNPCKH:
|
||||
case X86ISD::VPERMILPS:
|
||||
case X86ISD::VPERMILPD:
|
||||
case X86ISD::VPERMILP:
|
||||
case X86ISD::VPERM2F128:
|
||||
case X86ISD::VPERM2I128:
|
||||
return true;
|
||||
@ -2876,8 +2875,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
|
||||
case X86ISD::PSHUFD:
|
||||
case X86ISD::PSHUFHW:
|
||||
case X86ISD::PSHUFLW:
|
||||
case X86ISD::VPERMILPS:
|
||||
case X86ISD::VPERMILPD:
|
||||
case X86ISD::VPERMILP:
|
||||
return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
|
||||
}
|
||||
|
||||
@ -4613,14 +4611,9 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
|
||||
return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
|
||||
Depth+1);
|
||||
}
|
||||
case X86ISD::VPERMILPS:
|
||||
case X86ISD::VPERMILP:
|
||||
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||
DecodeVPERMILPSMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||
ShuffleMask);
|
||||
break;
|
||||
case X86ISD::VPERMILPD:
|
||||
ImmN = N->getOperand(N->getNumOperands()-1);
|
||||
DecodeVPERMILPDMask(NumElems, cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||
DecodeVPERMILPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
|
||||
ShuffleMask);
|
||||
break;
|
||||
case X86ISD::VPERM2F128:
|
||||
@ -6528,22 +6521,6 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned getVPERMILOpcode(EVT VT) {
|
||||
switch(VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::v4i32:
|
||||
case MVT::v4f32:
|
||||
case MVT::v8i32:
|
||||
case MVT::v8f32: return X86ISD::VPERMILPS;
|
||||
case MVT::v2i64:
|
||||
case MVT::v2f64:
|
||||
case MVT::v4i64:
|
||||
case MVT::v4f64: return X86ISD::VPERMILPD;
|
||||
default:
|
||||
llvm_unreachable("Unknown type for vpermil");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned getVPERM2X128Opcode(EVT VT, bool HasAVX2) {
|
||||
switch(VT.getSimpleVT().SimpleTy) {
|
||||
case MVT::v32i8:
|
||||
@ -6876,7 +6853,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
// Handle VPERMILPS/D* permutations
|
||||
if (isVPERMILPMask(M, VT, Subtarget->hasAVX()))
|
||||
return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
|
||||
return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
|
||||
getShuffleVPERMILPImmediate(SVOp), DAG);
|
||||
|
||||
// Handle VPERM2F128/VPERM2I128 permutations
|
||||
@ -11179,8 +11156,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::PUNPCKL: return "X86ISD::PUNPCKL";
|
||||
case X86ISD::PUNPCKH: return "X86ISD::PUNPCKH";
|
||||
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
|
||||
case X86ISD::VPERMILPS: return "X86ISD::VPERMILPS";
|
||||
case X86ISD::VPERMILPD: return "X86ISD::VPERMILPD";
|
||||
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
|
||||
case X86ISD::VPERM2F128: return "X86ISD::VPERM2F128";
|
||||
case X86ISD::VPERM2I128: return "X86ISD::VPERM2I128";
|
||||
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
|
||||
@ -14767,8 +14743,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case X86ISD::PSHUFLW:
|
||||
case X86ISD::MOVSS:
|
||||
case X86ISD::MOVSD:
|
||||
case X86ISD::VPERMILPS:
|
||||
case X86ISD::VPERMILPD:
|
||||
case X86ISD::VPERMILP:
|
||||
case X86ISD::VPERM2F128:
|
||||
case X86ISD::VPERM2I128:
|
||||
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
|
||||
|
@ -277,8 +277,7 @@ namespace llvm {
|
||||
UNPCKHP,
|
||||
PUNPCKL,
|
||||
PUNPCKH,
|
||||
VPERMILPS,
|
||||
VPERMILPD,
|
||||
VPERMILP,
|
||||
VPERM2F128,
|
||||
VPERM2I128,
|
||||
VBROADCAST,
|
||||
|
@ -136,8 +136,7 @@ def X86Unpckhp : SDNode<"X86ISD::UNPCKHP", SDTShuff2Op>;
|
||||
def X86Punpckl : SDNode<"X86ISD::PUNPCKL", SDTShuff2Op>;
|
||||
def X86Punpckh : SDNode<"X86ISD::PUNPCKH", SDTShuff2Op>;
|
||||
|
||||
def X86VPermilps : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
|
||||
def X86VPermilpd : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
|
||||
def X86VPermilp : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
|
||||
|
||||
def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
|
||||
def X86VPerm2i128 : SDNode<"X86ISD::VPERM2I128", SDTShuff3OpI>;
|
||||
|
@ -7164,31 +7164,6 @@ def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
|
||||
def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
|
||||
|
||||
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VEXTRACTF128 - Extract packed floating-point values
|
||||
//
|
||||
@ -7211,31 +7186,6 @@ def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
|
||||
def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
|
||||
(VEXTRACTF128rr VR256:$src1, imm:$src2)>;
|
||||
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v4f32 (VEXTRACTF128rr
|
||||
(v8f32 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v2f64 (VEXTRACTF128rr
|
||||
(v4f64 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v4i32 (VEXTRACTF128rr
|
||||
(v8i32 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v2i64 (VEXTRACTF128rr
|
||||
(v4i64 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v8i16 (VEXTRACTF128rr
|
||||
(v16i16 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v16i8 (VEXTRACTF128rr
|
||||
(v32i8 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VMASKMOV - Conditional SIMD Packed Loads and Stores
|
||||
//
|
||||
@ -7322,22 +7272,22 @@ let ExeDomain = SSEPackedDouble in {
|
||||
int_x86_avx_vpermil_pd_256>;
|
||||
}
|
||||
|
||||
def : Pat<(v8f32 (X86VPermilps VR256:$src1, (i8 imm:$imm))),
|
||||
def : Pat<(v8f32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
|
||||
(VPERMILPSYri VR256:$src1, imm:$imm)>;
|
||||
def : Pat<(v4f64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))),
|
||||
def : Pat<(v4f64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
|
||||
(VPERMILPDYri VR256:$src1, imm:$imm)>;
|
||||
def : Pat<(v8i32 (X86VPermilps VR256:$src1, (i8 imm:$imm))),
|
||||
def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
|
||||
(VPERMILPSYri VR256:$src1, imm:$imm)>;
|
||||
def : Pat<(v4i64 (X86VPermilpd VR256:$src1, (i8 imm:$imm))),
|
||||
def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
|
||||
(VPERMILPDYri VR256:$src1, imm:$imm)>;
|
||||
def : Pat<(v8f32 (X86VPermilps (memopv8f32 addr:$src1), (i8 imm:$imm))),
|
||||
def : Pat<(v8f32 (X86VPermilp (memopv8f32 addr:$src1), (i8 imm:$imm))),
|
||||
(VPERMILPSYmi addr:$src1, imm:$imm)>;
|
||||
def : Pat<(v4f64 (X86VPermilpd (memopv4f64 addr:$src1), (i8 imm:$imm))),
|
||||
def : Pat<(v4f64 (X86VPermilp (memopv4f64 addr:$src1), (i8 imm:$imm))),
|
||||
(VPERMILPDYmi addr:$src1, imm:$imm)>;
|
||||
def : Pat<(v8i32 (X86VPermilps (bc_v8i32 (memopv4i64 addr:$src1)),
|
||||
def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
|
||||
(i8 imm:$imm))),
|
||||
(VPERMILPSYmi addr:$src1, imm:$imm)>;
|
||||
def : Pat<(v4i64 (X86VPermilpd (memopv4i64 addr:$src1), (i8 imm:$imm))),
|
||||
def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
|
||||
(VPERMILPDYmi addr:$src1, imm:$imm)>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -7656,6 +7606,51 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
|
||||
(int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2),
|
||||
imm:$src3))]>, VEX_4V;
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
}
|
||||
|
||||
// AVX1 patterns
|
||||
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VEXTRACTI128 - Extract packed integer values
|
||||
//
|
||||
@ -7670,6 +7665,51 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
|
||||
(ins i128mem:$dst, VR256:$src1, i8imm:$src2),
|
||||
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v2i64 (VEXTRACTI128rr
|
||||
(v4i64 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v4i32 (VEXTRACTI128rr
|
||||
(v8i32 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v8i16 (VEXTRACTI128rr
|
||||
(v16i16 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v16i8 (VEXTRACTI128rr
|
||||
(v32i8 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
}
|
||||
|
||||
// AVX1 patterns
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v4f32 (VEXTRACTF128rr
|
||||
(v8f32 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v2f64 (VEXTRACTF128rr
|
||||
(v4f64 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v2i64 (VEXTRACTF128rr
|
||||
(v4i64 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v4i32 (VEXTRACTF128rr
|
||||
(v8i32 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v8i16 (VEXTRACTF128rr
|
||||
(v16i16 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v16i8 (VEXTRACTF128rr
|
||||
(v32i8 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
|
||||
//
|
||||
|
Loading…
x
Reference in New Issue
Block a user