mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-06 09:44:39 +00:00
During legalization, change Neon vdup_lane operations from shuffles to
target-specific VDUPLANE nodes. This allows the subreg handling for the quad-register version to be done easily with Pats in the .td file, instead of with custom code in ARMISelDAGToDAG.cpp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78993 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
75caad4f1a
commit
0ce3710825
@ -36,9 +36,6 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static const unsigned arm_dsubreg_0 = 5;
|
||||
static const unsigned arm_dsubreg_1 = 6;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
|
||||
/// instructions for SelectionDAG operations.
|
||||
@ -1307,44 +1304,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
|
||||
MVT::Other, Ops, 3);
|
||||
}
|
||||
|
||||
case ISD::VECTOR_SHUFFLE: {
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
// Match 128-bit splat to VDUPLANEQ. (This could be done with a Pat in
|
||||
// ARMInstrNEON.td but it is awkward because the shuffle mask needs to be
|
||||
// transformed first into a lane number and then to both a subregister
|
||||
// index and an adjusted lane number.) If the source operand is a
|
||||
// SCALAR_TO_VECTOR, leave it so it will be matched later as a VDUP.
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
||||
if (VT.is128BitVector() && SVOp->isSplat() &&
|
||||
Op.getOperand(0).getOpcode() != ISD::SCALAR_TO_VECTOR &&
|
||||
Op.getOperand(1).getOpcode() == ISD::UNDEF) {
|
||||
unsigned LaneVal = SVOp->getSplatIndex();
|
||||
|
||||
EVT HalfVT;
|
||||
unsigned Opc = 0;
|
||||
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("unhandled VDUP splat type");
|
||||
case MVT::i8: Opc = ARM::VDUPLN8q; HalfVT = MVT::v8i8; break;
|
||||
case MVT::i16: Opc = ARM::VDUPLN16q; HalfVT = MVT::v4i16; break;
|
||||
case MVT::i32: Opc = ARM::VDUPLN32q; HalfVT = MVT::v2i32; break;
|
||||
case MVT::f32: Opc = ARM::VDUPLNfq; HalfVT = MVT::v2f32; break;
|
||||
}
|
||||
|
||||
// The source operand needs to be changed to a subreg of the original
|
||||
// 128-bit operand, and the lane number needs to be adjusted accordingly.
|
||||
unsigned NumElts = VT.getVectorNumElements() / 2;
|
||||
unsigned SRVal = (LaneVal < NumElts ? arm_dsubreg_0 : arm_dsubreg_1);
|
||||
SDValue SR = CurDAG->getTargetConstant(SRVal, MVT::i32);
|
||||
SDValue NewLane = CurDAG->getTargetConstant(LaneVal % NumElts, MVT::i32);
|
||||
SDNode *SubReg = CurDAG->getTargetNode(TargetInstrInfo::EXTRACT_SUBREG,
|
||||
dl, HalfVT, N->getOperand(0), SR);
|
||||
return CurDAG->SelectNodeTo(N, Opc, VT, SDValue(SubReg, 0), NewLane);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case ARMISD::VLD2D: {
|
||||
SDValue MemAddr, MemUpdate, MemOpc;
|
||||
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||
|
@ -477,7 +477,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
|
||||
case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
|
||||
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
|
||||
case ARMISD::VDUPLANEQ: return "ARMISD::VDUPLANEQ";
|
||||
case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
|
||||
case ARMISD::VLD2D: return "ARMISD::VLD2D";
|
||||
case ARMISD::VLD3D: return "ARMISD::VLD3D";
|
||||
case ARMISD::VLD4D: return "ARMISD::VLD4D";
|
||||
@ -2447,6 +2447,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
// of inconsistencies between legalization and selection.
|
||||
// FIXME: floating-point vectors should be canonicalized to integer vectors
|
||||
// of the same time so that they get CSEd properly.
|
||||
if (SVN->isSplat()) {
|
||||
int Lane = SVN->getSplatIndex();
|
||||
if (Lane != 0)
|
||||
return DAG.getNode(ARMISD::VDUPLANE, dl, VT, SVN->getOperand(0),
|
||||
DAG.getConstant(Lane, MVT::i32));
|
||||
}
|
||||
if (isVREVMask(SVN, 64))
|
||||
return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0));
|
||||
if (isVREVMask(SVN, 32))
|
||||
|
@ -115,8 +115,8 @@ namespace llvm {
|
||||
VGETLANEu, // zero-extend vector extract element
|
||||
VGETLANEs, // sign-extend vector extract element
|
||||
|
||||
// Vector duplicate lane (128-bit result only; 64-bit is a shuffle)
|
||||
VDUPLANEQ, // splat a lane from a 64-bit vector to a 128-bit vector
|
||||
// Vector duplicate lane:
|
||||
VDUPLANE,
|
||||
|
||||
// Vector load/store with (de)interleaving
|
||||
VLD2D,
|
||||
|
@ -65,8 +65,11 @@ def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
|
||||
def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
|
||||
def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
|
||||
|
||||
def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ",
|
||||
SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>;
|
||||
// VDUPLANE can produce a quad-register result from a double-register source,
|
||||
// so the result is not constrained to match the source.
|
||||
def NEONvduplane : SDNode<"ARMISD::VDUPLANE",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
SDTCisVT<2, i32>]>>;
|
||||
|
||||
def SDTARMVLD2 : SDTypeProfile<2, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
|
||||
def SDTARMVLD3 : SDTypeProfile<3, 1, [SDTCisSameAs<0, 1>,
|
||||
@ -1781,31 +1784,18 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
|
||||
|
||||
// VDUP : Vector Duplicate Lane (from scalar to all elements)
|
||||
|
||||
def SHUFFLE_get_splat_lane : SDNodeXForm<vector_shuffle, [{
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
||||
return CurDAG->getTargetConstant(SVOp->getSplatIndex(), MVT::i32);
|
||||
}]>;
|
||||
|
||||
def splat_lane : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
(vector_shuffle node:$lhs, node:$rhs), [{
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
|
||||
return SVOp->isSplat();
|
||||
}], SHUFFLE_get_splat_lane>;
|
||||
|
||||
class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty>
|
||||
: N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0,
|
||||
(outs DPR:$dst), (ins DPR:$src, lane_cst:$lane), NoItinerary,
|
||||
!strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
|
||||
[(set DPR:$dst, (Ty (splat_lane:$lane DPR:$src, undef)))]>;
|
||||
[(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
|
||||
|
||||
// vector_shuffle requires that the source and destination types match, so
|
||||
// VDUP to a 128-bit result uses a target-specific VDUPLANEQ node.
|
||||
class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr,
|
||||
ValueType ResTy, ValueType OpTy>
|
||||
: N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0,
|
||||
(outs QPR:$dst), (ins DPR:$src, lane_cst:$lane), NoItinerary,
|
||||
!strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
|
||||
[(set QPR:$dst, (ResTy (NEONvduplaneq (OpTy DPR:$src), imm:$lane)))]>;
|
||||
[(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>;
|
||||
|
||||
def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>;
|
||||
def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>;
|
||||
@ -1816,6 +1806,23 @@ def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>;
|
||||
def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>;
|
||||
def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>;
|
||||
|
||||
def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
|
||||
(v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
|
||||
(DSubReg_i8_reg imm:$lane))),
|
||||
(SubReg_i8_lane imm:$lane)))>;
|
||||
def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
|
||||
(v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
|
||||
(DSubReg_i16_reg imm:$lane))),
|
||||
(SubReg_i16_lane imm:$lane)))>;
|
||||
def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
|
||||
(v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
|
||||
(DSubReg_i32_reg imm:$lane))),
|
||||
(SubReg_i32_lane imm:$lane)))>;
|
||||
def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
|
||||
(v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src,
|
||||
(DSubReg_i32_reg imm:$lane))),
|
||||
(SubReg_i32_lane imm:$lane)))>;
|
||||
|
||||
def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0,
|
||||
(outs DPR:$dst), (ins SPR:$src),
|
||||
NoItinerary, "vdup.32\t$dst, ${src:lane}", "",
|
||||
|
Loading…
x
Reference in New Issue
Block a user