mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-04 05:31:51 +00:00
Merging r195716:
------------------------------------------------------------------------ r195716 | kevinqin | 2013-11-25 19:26:47 -0800 (Mon, 25 Nov 2013) | 3 lines Refactored the implementation of AArch64 NEON instruction ZIP, UZP and TRN. Fix a bug when mixed use of vget_high_u8() and vuzp_u8(). ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@195735 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9ed81d16f7
commit
8a0ff1f236
@ -921,6 +921,18 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
return "AArch64ISD::NEON_REV32";
|
||||
case AArch64ISD::NEON_REV64:
|
||||
return "AArch64ISD::NEON_REV64";
|
||||
case AArch64ISD::NEON_UZP1:
|
||||
return "AArch64ISD::NEON_UZP1";
|
||||
case AArch64ISD::NEON_UZP2:
|
||||
return "AArch64ISD::NEON_UZP2";
|
||||
case AArch64ISD::NEON_ZIP1:
|
||||
return "AArch64ISD::NEON_ZIP1";
|
||||
case AArch64ISD::NEON_ZIP2:
|
||||
return "AArch64ISD::NEON_ZIP2";
|
||||
case AArch64ISD::NEON_TRN1:
|
||||
return "AArch64ISD::NEON_TRN1";
|
||||
case AArch64ISD::NEON_TRN2:
|
||||
return "AArch64ISD::NEON_TRN2";
|
||||
case AArch64ISD::NEON_LD1_UPD:
|
||||
return "AArch64ISD::NEON_LD1_UPD";
|
||||
case AArch64ISD::NEON_LD2_UPD:
|
||||
@ -3826,6 +3838,59 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check whether a Build Vector could be presented as Shuffle Vector. If yes,
|
||||
// try to call LowerVECTOR_SHUFFLE to lower it.
|
||||
bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
|
||||
SDValue &Res) const {
|
||||
SDLoc DL(Op);
|
||||
EVT VT = Op.getValueType();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
unsigned V0NumElts = 0;
|
||||
int Mask[16];
|
||||
SDValue V0, V1;
|
||||
|
||||
// Check if all elements are extracted from less than 3 vectors.
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue Elt = Op.getOperand(i);
|
||||
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
|
||||
return false;
|
||||
|
||||
if (V0.getNode() == 0) {
|
||||
V0 = Elt.getOperand(0);
|
||||
V0NumElts = V0.getValueType().getVectorNumElements();
|
||||
}
|
||||
if (Elt.getOperand(0) == V0) {
|
||||
Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue());
|
||||
continue;
|
||||
} else if (V1.getNode() == 0) {
|
||||
V1 = Elt.getOperand(0);
|
||||
}
|
||||
if (Elt.getOperand(0) == V1) {
|
||||
unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue();
|
||||
Mask[i] = (Lane + V0NumElts);
|
||||
continue;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!V1.getNode() && V0NumElts == NumElts * 2) {
|
||||
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
|
||||
DAG.getConstant(NumElts, MVT::i64));
|
||||
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
|
||||
DAG.getConstant(0, MVT::i64));
|
||||
V0NumElts = V0.getValueType().getVectorNumElements();
|
||||
}
|
||||
|
||||
if (V1.getNode() && NumElts == V0NumElts &&
|
||||
V0NumElts == V1.getValueType().getVectorNumElements()) {
|
||||
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
|
||||
Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
// If this is a case we can't handle, return null and let the default
|
||||
// expansion code take care of it.
|
||||
SDValue
|
||||
@ -3964,7 +4029,7 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
SmallVector<SDValue, 3> Ops;
|
||||
Ops.push_back(N);
|
||||
Ops.push_back(Op.getOperand(I));
|
||||
Ops.push_back(DAG.getConstant(I, MVT::i32));
|
||||
Ops.push_back(DAG.getConstant(I, MVT::i64));
|
||||
N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3);
|
||||
}
|
||||
}
|
||||
@ -3980,6 +4045,11 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
if (isConstant)
|
||||
return SDValue();
|
||||
|
||||
// Try to lower this in lowering ShuffleVector way.
|
||||
SDValue Shuf;
|
||||
if (isKnownShuffleVector(Op, DAG, Shuf))
|
||||
return Shuf;
|
||||
|
||||
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
|
||||
// know the default expansion would otherwise fall back on something even
|
||||
// worse. For a vector with one or two non-undef values, that's
|
||||
@ -3992,7 +4062,7 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
SDValue V = Op.getOperand(i);
|
||||
if (V.getOpcode() == ISD::UNDEF)
|
||||
continue;
|
||||
SDValue LaneIdx = DAG.getConstant(i, MVT::i32);
|
||||
SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
|
||||
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
|
||||
}
|
||||
return Vec;
|
||||
@ -4030,6 +4100,83 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and
|
||||
// TRN instruction.
|
||||
static unsigned isPermuteMask(ArrayRef<int> M, EVT VT) {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
if (NumElts < 4)
|
||||
return 0;
|
||||
|
||||
bool ismatch = true;
|
||||
|
||||
// Check UZP1
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
if ((unsigned)M[i] != i * 2) {
|
||||
ismatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ismatch)
|
||||
return AArch64ISD::NEON_UZP1;
|
||||
|
||||
// Check UZP2
|
||||
ismatch = true;
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
if ((unsigned)M[i] != i * 2 + 1) {
|
||||
ismatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ismatch)
|
||||
return AArch64ISD::NEON_UZP2;
|
||||
|
||||
// Check ZIP1
|
||||
ismatch = true;
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
if ((unsigned)M[i] != i / 2 + NumElts * (i % 2)) {
|
||||
ismatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ismatch)
|
||||
return AArch64ISD::NEON_ZIP1;
|
||||
|
||||
// Check ZIP2
|
||||
ismatch = true;
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
if ((unsigned)M[i] != (NumElts + i) / 2 + NumElts * (i % 2)) {
|
||||
ismatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ismatch)
|
||||
return AArch64ISD::NEON_ZIP2;
|
||||
|
||||
// Check TRN1
|
||||
ismatch = true;
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
if ((unsigned)M[i] != i + (NumElts - 1) * (i % 2)) {
|
||||
ismatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ismatch)
|
||||
return AArch64ISD::NEON_TRN1;
|
||||
|
||||
// Check TRN2
|
||||
ismatch = true;
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
if ((unsigned)M[i] != 1 + i + (NumElts - 1) * (i % 2)) {
|
||||
ismatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ismatch)
|
||||
return AArch64ISD::NEON_TRN2;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
@ -4056,6 +4203,10 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
if (isREVMask(ShuffleMask, VT, 16))
|
||||
return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
|
||||
|
||||
unsigned ISDNo = isPermuteMask(ShuffleMask, VT);
|
||||
if (ISDNo)
|
||||
return DAG.getNode(ISDNo, dl, VT, V1, V2);
|
||||
|
||||
// If the element of shuffle mask are all the same constant, we can
|
||||
// transform it into either NEON_VDUP or NEON_VDUPLANE
|
||||
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
|
||||
@ -4167,10 +4318,12 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
else
|
||||
EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
|
||||
|
||||
ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
|
||||
DAG.getConstant(Mask, MVT::i64));
|
||||
InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
|
||||
DAG.getConstant(InsIndex[I], MVT::i64));
|
||||
if (Mask >= 0) {
|
||||
ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
|
||||
DAG.getConstant(Mask, MVT::i64));
|
||||
InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
|
||||
DAG.getConstant(InsIndex[I], MVT::i64));
|
||||
}
|
||||
}
|
||||
return InsV;
|
||||
}
|
||||
|
@ -125,6 +125,14 @@ namespace AArch64ISD {
|
||||
// Vector FP move immediate
|
||||
NEON_FMOVIMM,
|
||||
|
||||
// Vector permute
|
||||
NEON_UZP1,
|
||||
NEON_UZP2,
|
||||
NEON_ZIP1,
|
||||
NEON_ZIP2,
|
||||
NEON_TRN1,
|
||||
NEON_TRN2,
|
||||
|
||||
// Vector Element reverse
|
||||
NEON_REV64,
|
||||
NEON_REV32,
|
||||
@ -225,6 +233,8 @@ public:
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const;
|
||||
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
const AArch64Subtarget *ST) const;
|
||||
|
||||
|
@ -46,6 +46,15 @@ def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
|
||||
def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
|
||||
def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
|
||||
|
||||
def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>]>;
|
||||
def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
|
||||
def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
|
||||
def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
|
||||
def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
|
||||
def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
|
||||
def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
|
||||
|
||||
def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
|
||||
def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
|
||||
def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
|
||||
@ -2384,331 +2393,57 @@ defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
|
||||
// The followings are for instruction class (Perm)
|
||||
|
||||
class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
|
||||
string asmop, RegisterOperand OpVPR, string OpS>
|
||||
string asmop, RegisterOperand OpVPR, string OpS,
|
||||
SDPatternOperator opnode, ValueType Ty>
|
||||
: NeonI_Perm<q, size, opcode,
|
||||
(outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
|
||||
asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
|
||||
[], NoItinerary>;
|
||||
[(set (Ty OpVPR:$Rd),
|
||||
(Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
|
||||
NoItinerary>;
|
||||
|
||||
multiclass NeonI_Perm_pat<bits<3> opcode, string asmop> {
|
||||
def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, VPR64, "8b">;
|
||||
def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, VPR128, "16b">;
|
||||
def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, VPR64, "4h">;
|
||||
def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, VPR128, "8h">;
|
||||
def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, VPR64, "2s">;
|
||||
def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, VPR128, "4s">;
|
||||
def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, VPR128, "2d">;
|
||||
}
|
||||
multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
|
||||
SDPatternOperator opnode> {
|
||||
def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop,
|
||||
VPR64, "8b", opnode, v8i8>;
|
||||
def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
|
||||
VPR128, "16b",opnode, v16i8>;
|
||||
def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop,
|
||||
VPR64, "4h", opnode, v4i16>;
|
||||
def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop,
|
||||
VPR128, "8h", opnode, v8i16>;
|
||||
def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop,
|
||||
VPR64, "2s", opnode, v2i32>;
|
||||
def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop,
|
||||
VPR128, "4s", opnode, v4i32>;
|
||||
def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop,
|
||||
VPR128, "2d", opnode, v2i64>;
|
||||
}
|
||||
|
||||
defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1">;
|
||||
defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1">;
|
||||
defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1">;
|
||||
defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2">;
|
||||
defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2">;
|
||||
defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2">;
|
||||
defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
|
||||
defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
|
||||
defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
|
||||
defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
|
||||
defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
|
||||
defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
|
||||
|
||||
// Extract and Insert
|
||||
def NI_ei_i32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
|
||||
(vector_insert node:$Rn,
|
||||
(i32 (vector_extract node:$Rm, node:$Ext)),
|
||||
node:$Ins)>;
|
||||
multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
|
||||
def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
|
||||
(!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
|
||||
|
||||
def NI_ei_f32 : PatFrag<(ops node:$Rn, node:$Rm, node:$Ext, node:$Ins),
|
||||
(vector_insert node:$Rn,
|
||||
(f32 (vector_extract node:$Rm, node:$Ext)),
|
||||
node:$Ins)>;
|
||||
def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
|
||||
(!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
|
||||
|
||||
// uzp1
|
||||
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 VPR128:$Rn),
|
||||
(v16i8 VPR128:$Rn), 2, 1)),
|
||||
(v16i8 VPR128:$Rn), 4, 2)),
|
||||
(v16i8 VPR128:$Rn), 6, 3)),
|
||||
(v16i8 VPR128:$Rn), 8, 4)),
|
||||
(v16i8 VPR128:$Rn), 10, 5)),
|
||||
(v16i8 VPR128:$Rn), 12, 6)),
|
||||
(v16i8 VPR128:$Rn), 14, 7)),
|
||||
(v16i8 VPR128:$Rm), 0, 8)),
|
||||
(v16i8 VPR128:$Rm), 2, 9)),
|
||||
(v16i8 VPR128:$Rm), 4, 10)),
|
||||
(v16i8 VPR128:$Rm), 6, 11)),
|
||||
(v16i8 VPR128:$Rm), 8, 12)),
|
||||
(v16i8 VPR128:$Rm), 10, 13)),
|
||||
(v16i8 VPR128:$Rm), 12, 14)),
|
||||
(v16i8 VPR128:$Rm), 14, 15)),
|
||||
(UZP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
|
||||
def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
|
||||
(!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
|
||||
}
|
||||
|
||||
class NI_Uzp1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
|
||||
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty VPR:$Rn),
|
||||
(Ty VPR:$Rn), 2, 1)),
|
||||
(Ty VPR:$Rn), 4, 2)),
|
||||
(Ty VPR:$Rn), 6, 3)),
|
||||
(Ty VPR:$Rm), 0, 4)),
|
||||
(Ty VPR:$Rm), 2, 5)),
|
||||
(Ty VPR:$Rm), 4, 6)),
|
||||
(Ty VPR:$Rm), 6, 7)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Uzp1_v8<v8i8, VPR64, UZP1vvv_8b>;
|
||||
def : NI_Uzp1_v8<v8i16, VPR128, UZP1vvv_8h>;
|
||||
|
||||
class NI_Uzp1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
|
||||
PatFrag ei>
|
||||
: Pat<(Ty (ei (Ty (ei (Ty (ei
|
||||
(Ty VPR:$Rn),
|
||||
(Ty VPR:$Rn), 2, 1)),
|
||||
(Ty VPR:$Rm), 0, 2)),
|
||||
(Ty VPR:$Rm), 2, 3)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Uzp1_v4<v4i16, VPR64, UZP1vvv_4h, NI_ei_i32>;
|
||||
def : NI_Uzp1_v4<v4i32, VPR128, UZP1vvv_4s, NI_ei_i32>;
|
||||
def : NI_Uzp1_v4<v4f32, VPR128, UZP1vvv_4s, NI_ei_f32>;
|
||||
|
||||
// uzp2
|
||||
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 VPR128:$Rm),
|
||||
(v16i8 VPR128:$Rn), 1, 0)),
|
||||
(v16i8 VPR128:$Rn), 3, 1)),
|
||||
(v16i8 VPR128:$Rn), 5, 2)),
|
||||
(v16i8 VPR128:$Rn), 7, 3)),
|
||||
(v16i8 VPR128:$Rn), 9, 4)),
|
||||
(v16i8 VPR128:$Rn), 11, 5)),
|
||||
(v16i8 VPR128:$Rn), 13, 6)),
|
||||
(v16i8 VPR128:$Rn), 15, 7)),
|
||||
(v16i8 VPR128:$Rm), 1, 8)),
|
||||
(v16i8 VPR128:$Rm), 3, 9)),
|
||||
(v16i8 VPR128:$Rm), 5, 10)),
|
||||
(v16i8 VPR128:$Rm), 7, 11)),
|
||||
(v16i8 VPR128:$Rm), 9, 12)),
|
||||
(v16i8 VPR128:$Rm), 11, 13)),
|
||||
(v16i8 VPR128:$Rm), 13, 14)),
|
||||
(UZP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
|
||||
|
||||
class NI_Uzp2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
|
||||
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty VPR:$Rm),
|
||||
(Ty VPR:$Rn), 1, 0)),
|
||||
(Ty VPR:$Rn), 3, 1)),
|
||||
(Ty VPR:$Rn), 5, 2)),
|
||||
(Ty VPR:$Rn), 7, 3)),
|
||||
(Ty VPR:$Rm), 1, 4)),
|
||||
(Ty VPR:$Rm), 3, 5)),
|
||||
(Ty VPR:$Rm), 5, 6)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Uzp2_v8<v8i8, VPR64, UZP2vvv_8b>;
|
||||
def : NI_Uzp2_v8<v8i16, VPR128, UZP2vvv_8h>;
|
||||
|
||||
class NI_Uzp2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
|
||||
PatFrag ei>
|
||||
: Pat<(Ty (ei (Ty (ei (Ty (ei
|
||||
(Ty VPR:$Rm),
|
||||
(Ty VPR:$Rn), 1, 0)),
|
||||
(Ty VPR:$Rn), 3, 1)),
|
||||
(Ty VPR:$Rm), 1, 2)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Uzp2_v4<v4i16, VPR64, UZP2vvv_4h, NI_ei_i32>;
|
||||
def : NI_Uzp2_v4<v4i32, VPR128, UZP2vvv_4s, NI_ei_i32>;
|
||||
def : NI_Uzp2_v4<v4f32, VPR128, UZP2vvv_4s, NI_ei_f32>;
|
||||
|
||||
// zip1
|
||||
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 VPR128:$Rn),
|
||||
(v16i8 VPR128:$Rm), 0, 1)),
|
||||
(v16i8 VPR128:$Rn), 1, 2)),
|
||||
(v16i8 VPR128:$Rm), 1, 3)),
|
||||
(v16i8 VPR128:$Rn), 2, 4)),
|
||||
(v16i8 VPR128:$Rm), 2, 5)),
|
||||
(v16i8 VPR128:$Rn), 3, 6)),
|
||||
(v16i8 VPR128:$Rm), 3, 7)),
|
||||
(v16i8 VPR128:$Rn), 4, 8)),
|
||||
(v16i8 VPR128:$Rm), 4, 9)),
|
||||
(v16i8 VPR128:$Rn), 5, 10)),
|
||||
(v16i8 VPR128:$Rm), 5, 11)),
|
||||
(v16i8 VPR128:$Rn), 6, 12)),
|
||||
(v16i8 VPR128:$Rm), 6, 13)),
|
||||
(v16i8 VPR128:$Rn), 7, 14)),
|
||||
(v16i8 VPR128:$Rm), 7, 15)),
|
||||
(ZIP1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
|
||||
|
||||
class NI_Zip1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
|
||||
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty VPR:$Rn),
|
||||
(Ty VPR:$Rm), 0, 1)),
|
||||
(Ty VPR:$Rn), 1, 2)),
|
||||
(Ty VPR:$Rm), 1, 3)),
|
||||
(Ty VPR:$Rn), 2, 4)),
|
||||
(Ty VPR:$Rm), 2, 5)),
|
||||
(Ty VPR:$Rn), 3, 6)),
|
||||
(Ty VPR:$Rm), 3, 7)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Zip1_v8<v8i8, VPR64, ZIP1vvv_8b>;
|
||||
def : NI_Zip1_v8<v8i16, VPR128, ZIP1vvv_8h>;
|
||||
|
||||
class NI_Zip1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
|
||||
PatFrag ei>
|
||||
: Pat<(Ty (ei (Ty (ei (Ty (ei
|
||||
(Ty VPR:$Rn),
|
||||
(Ty VPR:$Rm), 0, 1)),
|
||||
(Ty VPR:$Rn), 1, 2)),
|
||||
(Ty VPR:$Rm), 1, 3)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Zip1_v4<v4i16, VPR64, ZIP1vvv_4h, NI_ei_i32>;
|
||||
def : NI_Zip1_v4<v4i32, VPR128, ZIP1vvv_4s, NI_ei_i32>;
|
||||
def : NI_Zip1_v4<v4f32, VPR128, ZIP1vvv_4s, NI_ei_f32>;
|
||||
|
||||
// zip2
|
||||
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 VPR128:$Rm),
|
||||
(v16i8 VPR128:$Rn), 8, 0)),
|
||||
(v16i8 VPR128:$Rm), 8, 1)),
|
||||
(v16i8 VPR128:$Rn), 9, 2)),
|
||||
(v16i8 VPR128:$Rm), 9, 3)),
|
||||
(v16i8 VPR128:$Rn), 10, 4)),
|
||||
(v16i8 VPR128:$Rm), 10, 5)),
|
||||
(v16i8 VPR128:$Rn), 11, 6)),
|
||||
(v16i8 VPR128:$Rm), 11, 7)),
|
||||
(v16i8 VPR128:$Rn), 12, 8)),
|
||||
(v16i8 VPR128:$Rm), 12, 9)),
|
||||
(v16i8 VPR128:$Rn), 13, 10)),
|
||||
(v16i8 VPR128:$Rm), 13, 11)),
|
||||
(v16i8 VPR128:$Rn), 14, 12)),
|
||||
(v16i8 VPR128:$Rm), 14, 13)),
|
||||
(v16i8 VPR128:$Rn), 15, 14)),
|
||||
(ZIP2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
|
||||
|
||||
class NI_Zip2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
|
||||
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty VPR:$Rm),
|
||||
(Ty VPR:$Rn), 4, 0)),
|
||||
(Ty VPR:$Rm), 4, 1)),
|
||||
(Ty VPR:$Rn), 5, 2)),
|
||||
(Ty VPR:$Rm), 5, 3)),
|
||||
(Ty VPR:$Rn), 6, 4)),
|
||||
(Ty VPR:$Rm), 6, 5)),
|
||||
(Ty VPR:$Rn), 7, 6)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Zip2_v8<v8i8, VPR64, ZIP2vvv_8b>;
|
||||
def : NI_Zip2_v8<v8i16, VPR128, ZIP2vvv_8h>;
|
||||
|
||||
class NI_Zip2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
|
||||
PatFrag ei>
|
||||
: Pat<(Ty (ei (Ty (ei (Ty (ei
|
||||
(Ty VPR:$Rm),
|
||||
(Ty VPR:$Rn), 2, 0)),
|
||||
(Ty VPR:$Rm), 2, 1)),
|
||||
(Ty VPR:$Rn), 3, 2)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Zip2_v4<v4i16, VPR64, ZIP2vvv_4h, NI_ei_i32>;
|
||||
def : NI_Zip2_v4<v4i32, VPR128, ZIP2vvv_4s, NI_ei_i32>;
|
||||
def : NI_Zip2_v4<v4f32, VPR128, ZIP2vvv_4s, NI_ei_f32>;
|
||||
|
||||
// trn1
|
||||
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 VPR128:$Rn),
|
||||
(v16i8 VPR128:$Rm), 0, 1)),
|
||||
(v16i8 VPR128:$Rm), 2, 3)),
|
||||
(v16i8 VPR128:$Rm), 4, 5)),
|
||||
(v16i8 VPR128:$Rm), 6, 7)),
|
||||
(v16i8 VPR128:$Rm), 8, 9)),
|
||||
(v16i8 VPR128:$Rm), 10, 11)),
|
||||
(v16i8 VPR128:$Rm), 12, 13)),
|
||||
(v16i8 VPR128:$Rm), 14, 15)),
|
||||
(TRN1vvv_16b VPR128:$Rn, VPR128:$Rm)>;
|
||||
|
||||
class NI_Trn1_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
|
||||
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty VPR:$Rn),
|
||||
(Ty VPR:$Rm), 0, 1)),
|
||||
(Ty VPR:$Rm), 2, 3)),
|
||||
(Ty VPR:$Rm), 4, 5)),
|
||||
(Ty VPR:$Rm), 6, 7)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Trn1_v8<v8i8, VPR64, TRN1vvv_8b>;
|
||||
def : NI_Trn1_v8<v8i16, VPR128, TRN1vvv_8h>;
|
||||
|
||||
class NI_Trn1_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
|
||||
PatFrag ei>
|
||||
: Pat<(Ty (ei (Ty (ei
|
||||
(Ty VPR:$Rn),
|
||||
(Ty VPR:$Rm), 0, 1)),
|
||||
(Ty VPR:$Rm), 2, 3)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Trn1_v4<v4i16, VPR64, TRN1vvv_4h, NI_ei_i32>;
|
||||
def : NI_Trn1_v4<v4i32, VPR128, TRN1vvv_4s, NI_ei_i32>;
|
||||
def : NI_Trn1_v4<v4f32, VPR128, TRN1vvv_4s, NI_ei_f32>;
|
||||
|
||||
// trn2
|
||||
def : Pat<(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 (NI_ei_i32 (v16i8 (NI_ei_i32
|
||||
(v16i8 VPR128:$Rm),
|
||||
(v16i8 VPR128:$Rn), 1, 0)),
|
||||
(v16i8 VPR128:$Rn), 3, 2)),
|
||||
(v16i8 VPR128:$Rn), 5, 4)),
|
||||
(v16i8 VPR128:$Rn), 7, 6)),
|
||||
(v16i8 VPR128:$Rn), 9, 8)),
|
||||
(v16i8 VPR128:$Rn), 11, 10)),
|
||||
(v16i8 VPR128:$Rn), 13, 12)),
|
||||
(v16i8 VPR128:$Rn), 15, 14)),
|
||||
(TRN2vvv_16b VPR128:$Rn, VPR128:$Rm)>;
|
||||
|
||||
class NI_Trn2_v8<ValueType Ty, RegisterOperand VPR, Instruction INST>
|
||||
: Pat<(Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32 (Ty (NI_ei_i32
|
||||
(Ty VPR:$Rm),
|
||||
(Ty VPR:$Rn), 1, 0)),
|
||||
(Ty VPR:$Rn), 3, 2)),
|
||||
(Ty VPR:$Rn), 5, 4)),
|
||||
(Ty VPR:$Rn), 7, 6)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Trn2_v8<v8i8, VPR64, TRN2vvv_8b>;
|
||||
def : NI_Trn2_v8<v8i16, VPR128, TRN2vvv_8h>;
|
||||
|
||||
class NI_Trn2_v4<ValueType Ty, RegisterOperand VPR, Instruction INST,
|
||||
PatFrag ei>
|
||||
: Pat<(Ty (ei (Ty (ei
|
||||
(Ty VPR:$Rm),
|
||||
(Ty VPR:$Rn), 1, 0)),
|
||||
(Ty VPR:$Rn), 3, 2)),
|
||||
(INST VPR:$Rn, VPR:$Rm)>;
|
||||
|
||||
def : NI_Trn2_v4<v4i16, VPR64, TRN2vvv_4h, NI_ei_i32>;
|
||||
def : NI_Trn2_v4<v4i32, VPR128, TRN2vvv_4s, NI_ei_i32>;
|
||||
def : NI_Trn2_v4<v4f32, VPR128, TRN2vvv_4s, NI_ei_f32>;
|
||||
|
||||
// End of implementation for instruction class (Perm)
|
||||
defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
|
||||
defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
|
||||
defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
|
||||
defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
|
||||
defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
|
||||
defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
|
||||
|
||||
// The followings are for instruction class (3V Diff)
|
||||
|
||||
|
@ -1674,3 +1674,17 @@ entry:
|
||||
%.fca.0.1.insert = insertvalue %struct.poly16x8x2_t %.fca.0.0.insert, <8 x i16> %vtrn1.i, 0, 1
|
||||
ret %struct.poly16x8x2_t %.fca.0.1.insert
|
||||
}
|
||||
|
||||
define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
|
||||
; CHECK: test_uzp:
|
||||
|
||||
%vuzp.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
%vuzp1.i = shufflevector <16 x i8> %y, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
|
||||
%.fca.0.0.insert = insertvalue %struct.uint8x8x2_t undef, <8 x i8> %vuzp.i, 0, 0
|
||||
%.fca.0.1.insert = insertvalue %struct.uint8x8x2_t %.fca.0.0.insert, <8 x i8> %vuzp1.i, 0, 1
|
||||
ret %struct.uint8x8x2_t %.fca.0.1.insert
|
||||
|
||||
; CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1]
|
||||
; CHECK-NEXT: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
; CHECK-NEXT: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user