Merging r195716:

------------------------------------------------------------------------
r195716 | kevinqin | 2013-11-25 19:26:47 -0800 (Mon, 25 Nov 2013) | 3 lines

Refactored the implementation of AArch64 NEON instruction ZIP, UZP
and TRN.
Fix a bug when mixed use of vget_high_u8() and vuzp_u8().
------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195735 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Wendling 2013-11-26 10:57:43 +00:00
parent 9ed81d16f7
commit 8a0ff1f236
4 changed files with 234 additions and 322 deletions

View File

@ -921,6 +921,18 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
return "AArch64ISD::NEON_REV32";
case AArch64ISD::NEON_REV64:
return "AArch64ISD::NEON_REV64";
case AArch64ISD::NEON_UZP1:
return "AArch64ISD::NEON_UZP1";
case AArch64ISD::NEON_UZP2:
return "AArch64ISD::NEON_UZP2";
case AArch64ISD::NEON_ZIP1:
return "AArch64ISD::NEON_ZIP1";
case AArch64ISD::NEON_ZIP2:
return "AArch64ISD::NEON_ZIP2";
case AArch64ISD::NEON_TRN1:
return "AArch64ISD::NEON_TRN1";
case AArch64ISD::NEON_TRN2:
return "AArch64ISD::NEON_TRN2";
case AArch64ISD::NEON_LD1_UPD:
return "AArch64ISD::NEON_LD1_UPD";
case AArch64ISD::NEON_LD2_UPD:
@ -3826,6 +3838,59 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
// Check whether a Build Vector could be presented as Shuffle Vector. If yes,
// try to call LowerVECTOR_SHUFFLE to lower it.
bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
SDValue &Res) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
unsigned V0NumElts = 0;
int Mask[16];
SDValue V0, V1;
// Check if all elements are extracted from less than 3 vectors.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return false;
if (V0.getNode() == 0) {
V0 = Elt.getOperand(0);
V0NumElts = V0.getValueType().getVectorNumElements();
}
if (Elt.getOperand(0) == V0) {
Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue());
continue;
} else if (V1.getNode() == 0) {
V1 = Elt.getOperand(0);
}
if (Elt.getOperand(0) == V1) {
unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue();
Mask[i] = (Lane + V0NumElts);
continue;
} else {
return false;
}
}
if (!V1.getNode() && V0NumElts == NumElts * 2) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(NumElts, MVT::i64));
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(0, MVT::i64));
V0NumElts = V0.getValueType().getVectorNumElements();
}
if (V1.getNode() && NumElts == V0NumElts &&
V0NumElts == V1.getValueType().getVectorNumElements()) {
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
return true;
} else
return false;
}
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
SDValue
@ -3964,7 +4029,7 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SmallVector<SDValue, 3> Ops;
Ops.push_back(N);
Ops.push_back(Op.getOperand(I));
Ops.push_back(DAG.getConstant(I, MVT::i32));
Ops.push_back(DAG.getConstant(I, MVT::i64));
N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3);
}
}
@ -3980,6 +4045,11 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (isConstant)
return SDValue();
// Try to lower this in lowering ShuffleVector way.
SDValue Shuf;
if (isKnownShuffleVector(Op, DAG, Shuf))
return Shuf;
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
// worse. For a vector with one or two non-undef values, that's
@ -3992,7 +4062,7 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDValue V = Op.getOperand(i);
if (V.getOpcode() == ISD::UNDEF)
continue;
SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
}
return Vec;
@ -4030,6 +4100,83 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
return true;
}
// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and
// TRN instruction.
static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts < 4)
return 0;
bool ismatch = true;
// Check UZP1
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != i * 2) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_UZP1;
// Check UZP2
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != i * 2 + 1) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_UZP2;
// Check ZIP1
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != i / 2 + NumElts * (i % 2)) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_ZIP1;
// Check ZIP2
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != (NumElts + i) / 2 + NumElts * (i % 2)) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_ZIP2;
// Check TRN1
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != i + (NumElts - 1) * (i % 2)) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_TRN1;
// Check TRN2
ismatch = true;
for (unsigned i = 0; i < NumElts; ++i) {
if ((unsigned)M[i] != 1 + i + (NumElts - 1) * (i % 2)) {
ismatch = false;
break;
}
}
if (ismatch)
return AArch64ISD::NEON_TRN2;
return 0;
}
SDValue
AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
@ -4056,6 +4203,10 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (isREVMask(ShuffleMask, VT, 16))
return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
unsigned ISDNo = isPermuteMask(ShuffleMask, VT);
if (ISDNo)
return DAG.getNode(ISDNo, dl, VT, V1, V2);
// If the element of shuffle mask are all the same constant, we can
// transform it into either NEON_VDUP or NEON_VDUPLANE
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
@ -4167,10 +4318,12 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
else
EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
DAG.getConstant(Mask, MVT::i64));
InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
DAG.getConstant(InsIndex[I], MVT::i64));
if (Mask >= 0) {
ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
DAG.getConstant(Mask, MVT::i64));
InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
DAG.getConstant(InsIndex[I], MVT::i64));
}
}
return InsV;
}

View File

@ -125,6 +125,14 @@ namespace AArch64ISD {
// Vector FP move immediate
NEON_FMOVIMM,
// Vector permute
NEON_UZP1,
NEON_UZP2,
NEON_ZIP1,
NEON_ZIP2,
NEON_TRN1,
NEON_TRN2,
// Vector Element reverse
NEON_REV64,
NEON_REV32,
@ -225,6 +233,8 @@ public:
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) const;

View File

@ -46,6 +46,15 @@ def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
@ -2384,331 +2393,57 @@ defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
// The followings are for instruction class (Perm)
class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
string asmop, RegisterOperand OpVPR, string OpS>
string asmop, RegisterOperand OpVPR, string OpS,
SDPatternOperator opnode, ValueType Ty>
: NeonI_Perm<q, size, opcode,
(outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
[], NoItinerary>;
[(set (Ty OpVPR:$Rd),
(Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
NoItinerary>;
multiclass NeonI_Perm_pat<bits<3> opcode, string asmop> {
def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, VPR64, "8b">;
def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, VPR128, "16b">;
def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, VPR64, "4h">;
def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, VPR128, "8h">;
def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, VPR64, "2s">;
def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, VPR128, "4s">;
def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, VPR128, "2d">;
}
multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
SDPatternOperator opnode> {
def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop,
VPR64, "8b", opnode, v8i8>;
def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
VPR128, "16b",opnode, v16i8>;
def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop,
VPR64, "4h", opnode, v4i16>;
def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop,
VPR128, "8h", opnode, v8i16>;
def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop,
VPR64, "2s", opnode, v2i32>;
def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop,
VPR128, "4s", opnode, v4i32>;
def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop,
VPR128, "2d", opnode, v2i64>;
}
defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1">;
defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1">;
defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1">;
defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2">;
defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2">;
defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2">;
defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
// Extract and Insert
def NI_ei_i32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
(vector_insert node:$Rn,
(i32 (vector_extract node:$Rm, node:$Ext)),
node:$Ins)>;
multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
(!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
def NI_ei_f32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
(vector_insert node:$Rn,
(f32 (vector_extract node:$Rm, node:$Ext)),
node:$Ins)>;
def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
(!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
// uzp1
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rn),
(v16i8 VPR128:$Rn), 2, 1)),
(v16i8 VPR128:$Rn), 4, 2)),
(v16i8 VPR128:$Rn), 6, 3)),
(v16i8 VPR128:$Rn), 8, 4)),
(v16i8 VPR128:$Rn), 10, 5)),
(v16i8 VPR128:$Rn), 12, 6)),
(v16i8 VPR128:$Rn), 14, 7)),
(v16i8 VPR128:$Rm), 0, 8)),
(v16i8 VPR128:$Rm), 2, 9)),
(v16i8 VPR128:$Rm), 4, 10)),
(v16i8 VPR128:$Rm), 6, 11)),
(v16i8 VPR128:$Rm), 8, 12)),
(v16i8 VPR128:$Rm), 10, 13)),
(v16i8 VPR128:$Rm), 12, 14)),
(v16i8 VPR128:$Rm), 14, 15)),
(UZP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
(!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
}
class NI_Uzp1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rn),
(Ty VPR:$Rn), 2, 1)),
(Ty VPR:$Rn), 4, 2)),
(Ty VPR:$Rn), 6, 3)),
(Ty VPR:$Rm), 0, 4)),
(Ty VPR:$Rm), 2, 5)),
(Ty VPR:$Rm), 4, 6)),
(Ty VPR:$Rm), 6, 7)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Uzp1_v8<v8i8, VPR64, UZP1vvv_8b>;
def : NI_Uzp1_v8<v8i16, VPR128, UZP1vvv_8h>;
class NI_Uzp1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei (Ty (ei
(Ty VPR:$Rn),
(Ty VPR:$Rn), 2, 1)),
(Ty VPR:$Rm), 0, 2)),
(Ty VPR:$Rm), 2, 3)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Uzp1_v4<v4i16, VPR64, UZP1vvv_4h, NI_ei_i32>;
def : NI_Uzp1_v4<v4i32, VPR128, UZP1vvv_4s, NI_ei_i32>;
def : NI_Uzp1_v4<v4f32, VPR128, UZP1vvv_4s, NI_ei_f32>;
// uzp2
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rm),
(v16i8 VPR128:$Rn), 1, 0)),
(v16i8 VPR128:$Rn), 3, 1)),
(v16i8 VPR128:$Rn), 5, 2)),
(v16i8 VPR128:$Rn), 7, 3)),
(v16i8 VPR128:$Rn), 9, 4)),
(v16i8 VPR128:$Rn), 11, 5)),
(v16i8 VPR128:$Rn), 13, 6)),
(v16i8 VPR128:$Rn), 15, 7)),
(v16i8 VPR128:$Rm), 1, 8)),
(v16i8 VPR128:$Rm), 3, 9)),
(v16i8 VPR128:$Rm), 5, 10)),
(v16i8 VPR128:$Rm), 7, 11)),
(v16i8 VPR128:$Rm), 9, 12)),
(v16i8 VPR128:$Rm), 11, 13)),
(v16i8 VPR128:$Rm), 13, 14)),
(UZP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
class NI_Uzp2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rm),
(Ty VPR:$Rn), 1, 0)),
(Ty VPR:$Rn), 3, 1)),
(Ty VPR:$Rn), 5, 2)),
(Ty VPR:$Rn), 7, 3)),
(Ty VPR:$Rm), 1, 4)),
(Ty VPR:$Rm), 3, 5)),
(Ty VPR:$Rm), 5, 6)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Uzp2_v8<v8i8, VPR64, UZP2vvv_8b>;
def : NI_Uzp2_v8<v8i16, VPR128, UZP2vvv_8h>;
class NI_Uzp2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei (Ty (ei
(Ty VPR:$Rm),
(Ty VPR:$Rn), 1, 0)),
(Ty VPR:$Rn), 3, 1)),
(Ty VPR:$Rm), 1, 2)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Uzp2_v4<v4i16, VPR64, UZP2vvv_4h, NI_ei_i32>;
def : NI_Uzp2_v4<v4i32, VPR128, UZP2vvv_4s, NI_ei_i32>;
def : NI_Uzp2_v4<v4f32, VPR128, UZP2vvv_4s, NI_ei_f32>;
// zip1
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rn),
(v16i8 VPR128:$Rm), 0, 1)),
(v16i8 VPR128:$Rn), 1, 2)),
(v16i8 VPR128:$Rm), 1, 3)),
(v16i8 VPR128:$Rn), 2, 4)),
(v16i8 VPR128:$Rm), 2, 5)),
(v16i8 VPR128:$Rn), 3, 6)),
(v16i8 VPR128:$Rm), 3, 7)),
(v16i8 VPR128:$Rn), 4, 8)),
(v16i8 VPR128:$Rm), 4, 9)),
(v16i8 VPR128:$Rn), 5, 10)),
(v16i8 VPR128:$Rm), 5, 11)),
(v16i8 VPR128:$Rn), 6, 12)),
(v16i8 VPR128:$Rm), 6, 13)),
(v16i8 VPR128:$Rn), 7, 14)),
(v16i8 VPR128:$Rm), 7, 15)),
(ZIP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
class NI_Zip1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rn),
(Ty VPR:$Rm), 0, 1)),
(Ty VPR:$Rn), 1, 2)),
(Ty VPR:$Rm), 1, 3)),
(Ty VPR:$Rn), 2, 4)),
(Ty VPR:$Rm), 2, 5)),
(Ty VPR:$Rn), 3, 6)),
(Ty VPR:$Rm), 3, 7)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Zip1_v8<v8i8, VPR64, ZIP1vvv_8b>;
def : NI_Zip1_v8<v8i16, VPR128, ZIP1vvv_8h>;
class NI_Zip1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei (Ty (ei
(Ty VPR:$Rn),
(Ty VPR:$Rm), 0, 1)),
(Ty VPR:$Rn), 1, 2)),
(Ty VPR:$Rm), 1, 3)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Zip1_v4<v4i16, VPR64, ZIP1vvv_4h, NI_ei_i32>;
def : NI_Zip1_v4<v4i32, VPR128, ZIP1vvv_4s, NI_ei_i32>;
def : NI_Zip1_v4<v4f32, VPR128, ZIP1vvv_4s, NI_ei_f32>;
// zip2
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rm),
(v16i8 VPR128:$Rn), 8, 0)),
(v16i8 VPR128:$Rm), 8, 1)),
(v16i8 VPR128:$Rn), 9, 2)),
(v16i8 VPR128:$Rm), 9, 3)),
(v16i8 VPR128:$Rn), 10, 4)),
(v16i8 VPR128:$Rm), 10, 5)),
(v16i8 VPR128:$Rn), 11, 6)),
(v16i8 VPR128:$Rm), 11, 7)),
(v16i8 VPR128:$Rn), 12, 8)),
(v16i8 VPR128:$Rm), 12, 9)),
(v16i8 VPR128:$Rn), 13, 10)),
(v16i8 VPR128:$Rm), 13, 11)),
(v16i8 VPR128:$Rn), 14, 12)),
(v16i8 VPR128:$Rm), 14, 13)),
(v16i8 VPR128:$Rn), 15, 14)),
(ZIP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
class NI_Zip2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rm),
(Ty VPR:$Rn), 4, 0)),
(Ty VPR:$Rm), 4, 1)),
(Ty VPR:$Rn), 5, 2)),
(Ty VPR:$Rm), 5, 3)),
(Ty VPR:$Rn), 6, 4)),
(Ty VPR:$Rm), 6, 5)),
(Ty VPR:$Rn), 7, 6)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Zip2_v8<v8i8, VPR64, ZIP2vvv_8b>;
def : NI_Zip2_v8<v8i16, VPR128, ZIP2vvv_8h>;
class NI_Zip2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei (Ty (ei
(Ty VPR:$Rm),
(Ty VPR:$Rn), 2, 0)),
(Ty VPR:$Rm), 2, 1)),
(Ty VPR:$Rn), 3, 2)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Zip2_v4<v4i16, VPR64, ZIP2vvv_4h, NI_ei_i32>;
def : NI_Zip2_v4<v4i32, VPR128, ZIP2vvv_4s, NI_ei_i32>;
def : NI_Zip2_v4<v4f32, VPR128, ZIP2vvv_4s, NI_ei_f32>;
// trn1
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rn),
(v16i8 VPR128:$Rm), 0, 1)),
(v16i8 VPR128:$Rm), 2, 3)),
(v16i8 VPR128:$Rm), 4, 5)),
(v16i8 VPR128:$Rm), 6, 7)),
(v16i8 VPR128:$Rm), 8, 9)),
(v16i8 VPR128:$Rm), 10, 11)),
(v16i8 VPR128:$Rm), 12, 13)),
(v16i8 VPR128:$Rm), 14, 15)),
(TRN1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
class NI_Trn1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rn),
(Ty VPR:$Rm), 0, 1)),
(Ty VPR:$Rm), 2, 3)),
(Ty VPR:$Rm), 4, 5)),
(Ty VPR:$Rm), 6, 7)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Trn1_v8<v8i8, VPR64, TRN1vvv_8b>;
def : NI_Trn1_v8<v8i16, VPR128, TRN1vvv_8h>;
class NI_Trn1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei
(Ty VPR:$Rn),
(Ty VPR:$Rm), 0, 1)),
(Ty VPR:$Rm), 2, 3)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Trn1_v4<v4i16, VPR64, TRN1vvv_4h, NI_ei_i32>;
def : NI_Trn1_v4<v4i32, VPR128, TRN1vvv_4s, NI_ei_i32>;
def : NI_Trn1_v4<v4f32, VPR128, TRN1vvv_4s, NI_ei_f32>;
// trn2
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
(v16i8 VPR128:$Rm),
(v16i8 VPR128:$Rn), 1, 0)),
(v16i8 VPR128:$Rn), 3, 2)),
(v16i8 VPR128:$Rn), 5, 4)),
(v16i8 VPR128:$Rn), 7, 6)),
(v16i8 VPR128:$Rn), 9, 8)),
(v16i8 VPR128:$Rn), 11, 10)),
(v16i8 VPR128:$Rn), 13, 12)),
(v16i8 VPR128:$Rn), 15, 14)),
(TRN2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
class NI_Trn2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
(Ty VPR:$Rm),
(Ty VPR:$Rn), 1, 0)),
(Ty VPR:$Rn), 3, 2)),
(Ty VPR:$Rn), 5, 4)),
(Ty VPR:$Rn), 7, 6)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Trn2_v8<v8i8, VPR64, TRN2vvv_8b>;
def : NI_Trn2_v8<v8i16, VPR128, TRN2vvv_8h>;
class NI_Trn2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
PatFrag ei>
: Pat<(Ty (ei (Ty (ei
(Ty VPR:$Rm),
(Ty VPR:$Rn), 1, 0)),
(Ty VPR:$Rn), 3, 2)),
(INST VPR:$Rn, VPR:$Rm)>;
def : NI_Trn2_v4<v4i16, VPR64, TRN2vvv_4h, NI_ei_i32>;
def : NI_Trn2_v4<v4i32, VPR128, TRN2vvv_4s, NI_ei_i32>;
def : NI_Trn2_v4<v4f32, VPR128, TRN2vvv_4s, NI_ei_f32>;
// End of implementation for instruction class (Perm)
defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
// The followings are for instruction class (3V Diff)

View File

@ -1674,3 +1674,17 @@ entry:
%.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
ret %struct.poly16x8x2_t %.fca.0.1.insert
}
define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
; CHECK: test_uzp:
%vuzp.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%vuzp1.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
%.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
ret %struct.uint8x8x2_t %.fca.0.1.insert
; CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
; CHECK-NEXT: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
; CHECK-NEXT: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
}