Pull the VECTOR_SHUFFLE and BUILD_VECTOR lowering code out into separate

functions, which makes the code much cleaner :)


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27692 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2006-04-14 05:19:18 +00:00
parent 0fa07f95b6
commit f1b4708950

View File

@ -587,11 +587,166 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
return false;
}
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
// this case more efficiently than a constant pool load, lower it to the
// sequence of ops that should be used.
static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
// If this is a vector of constants or undefs, get the bits. A bit in
// UndefBits is set if the corresponding element of the vector is an
// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
// zero.
uint64_t VectorBits[2];
uint64_t UndefBits[2];
if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
return SDOperand(); // Not a constant vector.
// See if this is all zeros.
if ((VectorBits[0] | VectorBits[1]) == 0) {
// Canonicalize all zero vectors to be v4i32.
if (Op.getValueType() != MVT::v4i32) {
SDOperand Z = DAG.getConstant(0, MVT::i32);
Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
}
return Op;
}
// Check to see if this is something we can use VSPLTI* to form.
MVT::ValueType CanonicalVT = MVT::Other;
SDNode *CST = 0;
if ((CST = PPC::get_VSPLTI_elt(Op.Val, 4, DAG).Val)) // vspltisw
CanonicalVT = MVT::v4i32;
else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 2, DAG).Val)) // vspltish
CanonicalVT = MVT::v8i16;
else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 1, DAG).Val)) // vspltisb
CanonicalVT = MVT::v16i8;
// If this matches one of the vsplti* patterns, force it to the canonical
// type for the pattern.
if (CST) {
if (Op.getValueType() != CanonicalVT) {
// Convert the splatted element to the right element type.
SDOperand Elt = DAG.getNode(ISD::TRUNCATE,
MVT::getVectorBaseType(CanonicalVT),
SDOperand(CST, 0));
std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
}
return Op;
}
// If this is some other splat of 4-byte elements, see if we can handle it
// in another way.
// FIXME: Make this more undef happy and work with other widths (1,2 bytes).
if (VectorBits[0] == VectorBits[1] &&
unsigned(VectorBits[0]) == unsigned(VectorBits[0] >> 32)) {
unsigned Bits = unsigned(VectorBits[0]);
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). These are important
// for fneg/fabs.
if (Bits == 0x80000000 || Bits == 0x7FFFFFFF) {
// Make -1 and vspltisw -1:
SDOperand OnesI = DAG.getConstant(~0U, MVT::i32);
SDOperand OnesV = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
OnesI, OnesI, OnesI, OnesI);
// Make the VSLW intrinsic, computing 0x8000_0000.
SDOperand Res
= DAG.getNode(ISD::INTRINSIC_WO_CHAIN, MVT::v4i32,
DAG.getConstant(Intrinsic::ppc_altivec_vslw, MVT::i32),
OnesV, OnesV);
// If this is 0x7FFF_FFFF, xor by OnesV to invert it.
if (Bits == 0x7FFFFFFF)
Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
}
}
return SDOperand();
}
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
/// is a shuffle we can handle in a single instruction, return it. Otherwise,
/// return the code it can be lowered into. Worst case, it can always be
/// lowered into a vperm.
static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
if (V2.getOpcode() == ISD::UNDEF) {
if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||
PPC::isSplatShuffleMask(PermMask.Val, 2) ||
PPC::isSplatShuffleMask(PermMask.Val, 4) ||
PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||
PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||
PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||
PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||
PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||
PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {
return Op;
}
}
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||
PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||
PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||
PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||
PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||
PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))
return Op;
// TODO: Handle more cases, and also handle cases that are cheaper to do as
// multiple such instructions than as a constant pool load/vperm pair.
// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
// vector that will get spilled to the constant pool.
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
std::vector<SDOperand> ResultMask;
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
for (unsigned j = 0; j != BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
MVT::i8));
}
SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
}
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default: assert(0 && "Wasn't expecting to be able to lower this!");
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::FP_TO_SINT: {
assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType()));
SDOperand Src = Op.getOperand(0);
@ -963,153 +1118,6 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
// Load it out.
return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL));
}
case ISD::BUILD_VECTOR: {
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, return Op
// or something simpler.
// If this is a vector of constants or undefs, get the bits. A bit in
// UndefBits is set if the corresponding element of the vector is an
// ISD::UNDEF value. For undefs, the corresponding VectorBits values are
// zero.
uint64_t VectorBits[2];
uint64_t UndefBits[2];
if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits))
return SDOperand(); // Not a constant vector.
// See if this is all zeros.
if ((VectorBits[0] | VectorBits[1]) == 0) {
// Canonicalize all zero vectors to be v4i32.
if (Op.getValueType() != MVT::v4i32) {
SDOperand Z = DAG.getConstant(0, MVT::i32);
Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z);
Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z);
}
return Op;
}
// Check to see if this is something we can use VSPLTI* to form.
MVT::ValueType CanonicalVT = MVT::Other;
SDNode *CST = 0;
if ((CST = PPC::get_VSPLTI_elt(Op.Val, 4, DAG).Val)) // vspltisw
CanonicalVT = MVT::v4i32;
else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 2, DAG).Val)) // vspltish
CanonicalVT = MVT::v8i16;
else if ((CST = PPC::get_VSPLTI_elt(Op.Val, 1, DAG).Val)) // vspltisb
CanonicalVT = MVT::v16i8;
// If this matches one of the vsplti* patterns, force it to the canonical
// type for the pattern.
if (CST) {
if (Op.getValueType() != CanonicalVT) {
// Convert the splatted element to the right element type.
SDOperand Elt = DAG.getNode(ISD::TRUNCATE,
MVT::getVectorBaseType(CanonicalVT),
SDOperand(CST, 0));
std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt);
SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops);
Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
}
return Op;
}
// If this is some other splat of 4-byte elements, see if we can handle it
// in another way.
// FIXME: Make this more undef happy and work with other widths (1,2 bytes).
if (VectorBits[0] == VectorBits[1] &&
unsigned(VectorBits[0]) == unsigned(VectorBits[0] >> 32)) {
unsigned Bits = unsigned(VectorBits[0]);
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
// 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). These are important
// for fneg/fabs.
if (Bits == 0x80000000 || Bits == 0x7FFFFFFF) {
// Make -1 and vspltisw -1:
SDOperand OnesI = DAG.getConstant(~0U, MVT::i32);
SDOperand OnesV = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
OnesI, OnesI, OnesI, OnesI);
// Make the VSLW intrinsic, computing 0x8000_0000.
SDOperand Res
= DAG.getNode(ISD::INTRINSIC_WO_CHAIN, MVT::v4i32,
DAG.getConstant(Intrinsic::ppc_altivec_vslw, MVT::i32),
OnesV, OnesV);
// If this is 0x7FFF_FFFF, xor by OnesV to invert it.
if (Bits == 0x7FFFFFFF)
Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV);
return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res);
}
}
return SDOperand();
}
case ISD::VECTOR_SHUFFLE: {
SDOperand V1 = Op.getOperand(0);
SDOperand V2 = Op.getOperand(1);
SDOperand PermMask = Op.getOperand(2);
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
// selected by the instruction selector.
if (V2.getOpcode() == ISD::UNDEF) {
if (PPC::isSplatShuffleMask(PermMask.Val, 1) ||
PPC::isSplatShuffleMask(PermMask.Val, 2) ||
PPC::isSplatShuffleMask(PermMask.Val, 4) ||
PPC::isVPKUWUMShuffleMask(PermMask.Val, true) ||
PPC::isVPKUHUMShuffleMask(PermMask.Val, true) ||
PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 ||
PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) ||
PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) ||
PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) {
return Op;
}
}
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) ||
PPC::isVPKUHUMShuffleMask(PermMask.Val, false) ||
PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 ||
PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) ||
PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) ||
PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) ||
PPC::isVMRGHShuffleMask(PermMask.Val, 4, false))
return Op;
// TODO: Handle more cases, and also handle cases that are cheaper to do as
// multiple such instructions than as a constant pool load/vperm pair.
// Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
// vector that will get spilled to the constant pool.
if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType());
unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8;
std::vector<SDOperand> ResultMask;
for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
for (unsigned j = 0; j != BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
MVT::i8));
}
SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask);
return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask);
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();