For NEON vectors with 32- or 64-bit elements, select BUILD_VECTORs and

VECTOR_SHUFFLEs to REG_SEQUENCE instructions.  The standard ISD::BUILD_VECTOR
node corresponds closely to REG_SEQUENCE but I couldn't use it here because
its operands do not get legalized.  That is pretty awful, but I guess it
makes sense for other targets.  Instead, I have added an ARM-specific version
of BUILD_VECTOR that will have its operands properly legalized.
This fixes the rest of Radar 7872877.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105439 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bob Wilson 2010-06-04 00:04:02 +00:00
parent da8dd92c9f
commit 40cbe7d5d4
3 changed files with 75 additions and 31 deletions

View File

@ -174,24 +174,17 @@ private:
char ConstraintCode,
std::vector<SDValue> &OutOps);
/// PairDRegs - Form a quad register from a pair of D registers.
///
// Form pairs of consecutive S, D, or Q registers.
SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
/// PairDRegs - Form a quad register pair from a pair of Q registers.
///
SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
/// QuadDRegs - Form a quad register pair from a quad of D registers.
///
// Form sequences of 4 consecutive S, D, or Q registers.
SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
/// QuadQRegs - Form 4 consecutive Q registers.
///
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
/// OctoDRegs - Form 8 consecutive D registers.
///
// Form sequences of 8 consecutive D registers.
SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
SDValue V4, SDValue V5, SDValue V6, SDValue V7);
};
@ -963,6 +956,24 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
return NULL;
}
/// PairSRegs - Form a D register from a pair of S registers.
///
SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
if (llvm::ModelWithRegSequence()) {
const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
SDValue Undef =
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0);
SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
VT, Undef, V0, SubReg0);
return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
VT, SDValue(Pair, 0), V1, SubReg1);
}
/// PairDRegs - Form a quad register from a pair of D registers.
///
SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
@ -991,6 +1002,19 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
/// QuadSRegs - Form 4 consecutive S registers.
///
SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
SDValue V2, SDValue V3) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
}
/// QuadDRegs - Form 4 consecutive D registers.
///
SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
@ -2214,6 +2238,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
}
case ARMISD::BUILD_VECTOR: {
EVT VecVT = N->getValueType(0);
EVT EltVT = VecVT.getVectorElementType();
unsigned NumElts = VecVT.getVectorNumElements();
if (EltVT.getSimpleVT() == MVT::f64) {
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
}
assert(EltVT.getSimpleVT() == MVT::f32 &&
"unexpected type for BUILD_VECTOR");
if (NumElts == 2)
return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3));
}
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {

View File

@ -591,6 +591,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
}
@ -3121,21 +3122,17 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters.
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
// will be legalized.
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
EVT EltVT = EVT::getFloatingPointVT(EltSize);
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
SDValue Val = DAG.getUNDEF(VecVT);
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Elt = Op.getOperand(i);
if (Elt.getOpcode() == ISD::UNDEF)
continue;
Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
DAG.getConstant(i, MVT::i32));
}
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
@ -3346,7 +3343,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
// Implement shuffles with 32- or 64-bit elements as subreg copies.
// Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
@ -3355,17 +3352,17 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
SDValue Val = DAG.getUNDEF(VecVT);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i) {
if (ShuffleMask[i] < 0)
continue;
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
ShuffleMask[i] < (int)NumElts ? V1 : V2,
DAG.getConstant(ShuffleMask[i] & (NumElts-1),
MVT::i32));
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
Elt, DAG.getConstant(i, MVT::i32));
Ops.push_back(DAG.getUNDEF(EltVT));
else
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
ShuffleMask[i] < (int)NumElts ? V1 : V2,
DAG.getConstant(ShuffleMask[i] & (NumElts-1),
MVT::i32)));
}
SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}

View File

@ -135,6 +135,13 @@ namespace llvm {
VUZP, // unzip (deinterleave)
VTRN, // transpose
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
// operands need to be legalized. Define an ARM-specific version of
// BUILD_VECTOR for this purpose.
BUILD_VECTOR,
// Floating-point max and min:
FMAX,
FMIN