mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-11 16:37:42 +00:00
For NEON vectors with 32- or 64-bit elements, select BUILD_VECTORs and
VECTOR_SHUFFLEs to REG_SEQUENCE instructions. The standard ISD::BUILD_VECTOR node corresponds closely to REG_SEQUENCE but I couldn't use it here because its operands do not get legalized. That is pretty awful, but I guess it makes sense for other targets. Instead, I have added an ARM-specific version of BUILD_VECTOR that will have its operands properly legalized. This fixes the rest of Radar 7872877. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105439 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
da8dd92c9f
commit
40cbe7d5d4
@ -174,24 +174,17 @@ private:
|
||||
char ConstraintCode,
|
||||
std::vector<SDValue> &OutOps);
|
||||
|
||||
/// PairDRegs - Form a quad register from a pair of D registers.
|
||||
///
|
||||
// Form pairs of consecutive S, D, or Q registers.
|
||||
SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
|
||||
SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
|
||||
|
||||
/// PairDRegs - Form a quad register pair from a pair of Q registers.
|
||||
///
|
||||
SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
|
||||
|
||||
/// QuadDRegs - Form a quad register pair from a quad of D registers.
|
||||
///
|
||||
// Form sequences of 4 consecutive S, D, or Q registers.
|
||||
SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
|
||||
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
|
||||
|
||||
/// QuadQRegs - Form 4 consecutive Q registers.
|
||||
///
|
||||
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
|
||||
|
||||
/// OctoDRegs - Form 8 consecutive D registers.
|
||||
///
|
||||
// Form sequences of 8 consecutive D registers.
|
||||
SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
|
||||
SDValue V4, SDValue V5, SDValue V6, SDValue V7);
|
||||
};
|
||||
@ -963,6 +956,24 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/// PairSRegs - Form a D register from a pair of S registers.
|
||||
///
|
||||
SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
|
||||
DebugLoc dl = V0.getNode()->getDebugLoc();
|
||||
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
|
||||
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
|
||||
if (llvm::ModelWithRegSequence()) {
|
||||
const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
|
||||
}
|
||||
SDValue Undef =
|
||||
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0);
|
||||
SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
|
||||
VT, Undef, V0, SubReg0);
|
||||
return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
|
||||
VT, SDValue(Pair, 0), V1, SubReg1);
|
||||
}
|
||||
|
||||
/// PairDRegs - Form a quad register from a pair of D registers.
|
||||
///
|
||||
SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
|
||||
@ -991,6 +1002,19 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
|
||||
}
|
||||
|
||||
/// QuadSRegs - Form 4 consecutive S registers.
|
||||
///
|
||||
SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
|
||||
SDValue V2, SDValue V3) {
|
||||
DebugLoc dl = V0.getNode()->getDebugLoc();
|
||||
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
|
||||
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
|
||||
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
|
||||
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
|
||||
const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
|
||||
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
|
||||
}
|
||||
|
||||
/// QuadDRegs - Form 4 consecutive D registers.
|
||||
///
|
||||
SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
|
||||
@ -2214,6 +2238,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
|
||||
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
|
||||
}
|
||||
case ARMISD::BUILD_VECTOR: {
|
||||
EVT VecVT = N->getValueType(0);
|
||||
EVT EltVT = VecVT.getVectorElementType();
|
||||
unsigned NumElts = VecVT.getVectorNumElements();
|
||||
if (EltVT.getSimpleVT() == MVT::f64) {
|
||||
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
|
||||
return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
|
||||
}
|
||||
assert(EltVT.getSimpleVT() == MVT::f32 &&
|
||||
"unexpected type for BUILD_VECTOR");
|
||||
if (NumElts == 2)
|
||||
return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
|
||||
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
|
||||
return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
|
||||
N->getOperand(2), N->getOperand(3));
|
||||
}
|
||||
|
||||
case ISD::INTRINSIC_VOID:
|
||||
case ISD::INTRINSIC_W_CHAIN: {
|
||||
|
@ -591,6 +591,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case ARMISD::VZIP: return "ARMISD::VZIP";
|
||||
case ARMISD::VUZP: return "ARMISD::VUZP";
|
||||
case ARMISD::VTRN: return "ARMISD::VTRN";
|
||||
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
|
||||
case ARMISD::FMAX: return "ARMISD::FMAX";
|
||||
case ARMISD::FMIN: return "ARMISD::FMIN";
|
||||
}
|
||||
@ -3121,21 +3122,17 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
|
||||
|
||||
// Vectors with 32- or 64-bit elements can be built by directly assigning
|
||||
// the subregisters.
|
||||
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
|
||||
// will be legalized.
|
||||
if (EltSize >= 32) {
|
||||
// Do the expansion with floating-point types, since that is what the VFP
|
||||
// registers are defined to use, and since i64 is not legal.
|
||||
EVT EltVT = EVT::getFloatingPointVT(EltSize);
|
||||
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
|
||||
SDValue Val = DAG.getUNDEF(VecVT);
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue Elt = Op.getOperand(i);
|
||||
if (Elt.getOpcode() == ISD::UNDEF)
|
||||
continue;
|
||||
Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
|
||||
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
|
||||
DAG.getConstant(i, MVT::i32));
|
||||
}
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
for (unsigned i = 0; i < NumElts; ++i)
|
||||
Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
|
||||
SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
|
||||
}
|
||||
|
||||
@ -3346,7 +3343,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
|
||||
}
|
||||
|
||||
// Implement shuffles with 32- or 64-bit elements as subreg copies.
|
||||
// Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
|
||||
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
|
||||
if (EltSize >= 32) {
|
||||
// Do the expansion with floating-point types, since that is what the VFP
|
||||
@ -3355,17 +3352,17 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
|
||||
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
|
||||
V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
|
||||
SDValue Val = DAG.getUNDEF(VecVT);
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
if (ShuffleMask[i] < 0)
|
||||
continue;
|
||||
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
|
||||
ShuffleMask[i] < (int)NumElts ? V1 : V2,
|
||||
DAG.getConstant(ShuffleMask[i] & (NumElts-1),
|
||||
MVT::i32));
|
||||
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
|
||||
Elt, DAG.getConstant(i, MVT::i32));
|
||||
Ops.push_back(DAG.getUNDEF(EltVT));
|
||||
else
|
||||
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
|
||||
ShuffleMask[i] < (int)NumElts ? V1 : V2,
|
||||
DAG.getConstant(ShuffleMask[i] & (NumElts-1),
|
||||
MVT::i32)));
|
||||
}
|
||||
SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
|
||||
}
|
||||
|
||||
|
@ -135,6 +135,13 @@ namespace llvm {
|
||||
VUZP, // unzip (deinterleave)
|
||||
VTRN, // transpose
|
||||
|
||||
// Operands of the standard BUILD_VECTOR node are not legalized, which
|
||||
// is fine if BUILD_VECTORs are always lowered to shuffles or other
|
||||
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
|
||||
// operands need to be legalized. Define an ARM-specific version of
|
||||
// BUILD_VECTOR for this purpose.
|
||||
BUILD_VECTOR,
|
||||
|
||||
// Floating-point max and min:
|
||||
FMAX,
|
||||
FMIN
|
||||
|
Loading…
x
Reference in New Issue
Block a user